src/generation.py - rag-assistant

generation.py

"""Generate grounded answers from retrieved chunks."""

from __future__ import annotations

from src.config import (
    get_generation_model,
    get_openrouter_api_key,
    get_openrouter_base_url,
)


def generate_answer(question: str, retrieved_chunks: list[dict]) -> str:
    if not retrieved_chunks:
        return no_context_answer()

    api_key = get_openrouter_api_key()
    if not api_key:
        return missing_api_key_answer(retrieved_chunks)

    client = create_openrouter_client(api_key)
    prompt = build_prompt(question, retrieved_chunks)
    model = get_generation_model()

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a careful internal knowledge-base assistant. "
                        "You answer only from the provided context."
                    ),
                },
                {"role": "user", "content": prompt},
            ],
            temperature=0.1,
        )
    except Exception as exc:
        return api_error_answer(exc, retrieved_chunks)

    content = response.choices[0].message.content
    if not content:
        return "The model returned an empty answer."

    return content.strip()


def create_openrouter_client(api_key: str):
    from openai import OpenAI

    return OpenAI(api_key=api_key, base_url=get_openrouter_base_url(), timeout=60)


def build_prompt(question: str, retrieved_chunks: list[dict]) -> str:
    context = build_context(retrieved_chunks)

    return f"""
Answer the question using only the context below.

Rules:
- Use only facts that appear in the context.
- If the context does not contain the answer, say that the corpus does not contain enough information.
- If the context does not contain the answer, use "Sources: - none" and "Confidence: low".
- Cite only sources that directly support the answer.
- Cite the document filename and section for each source.
- If sources disagree, explain the disagreement.
- If earlier guidance differs from a final document, answer that there was conflicting or superseded guidance.
- Prefer final, signed, or newer documents over kickoff notes, early memos, or preliminary extracts.
- Include important supporting details such as amounts, percentages, reasons for changes, roles, experience, and replacement contacts when the context provides them.
- For temporal or contradiction questions, mention the earlier fact, the newer/final fact, and the reason or replacement if available.
- Keep the answer concise.

Question:
{question}

Context:
{context}

Answer format:
Answer:
...

Sources:
- filename, section

Confidence:
high / medium / low
""".strip()


def build_context(retrieved_chunks: list[dict]) -> str:
    blocks = []

    for index, chunk in enumerate(retrieved_chunks, start=1):
        metadata = chunk["metadata"]
        blocks.append(
            "\n".join(
                [
                    f"[Source {index}]",
                    f"Document: {metadata['source']}",
                    f"Section: {metadata['section']}",
                    f"Document type: {metadata['document_type']}",
                    "Text:",
                    chunk["text"],
                ]
            )
        )

    return "\n\n".join(blocks)


def no_context_answer() -> str:
    return "\n".join(
        [
            "Answer:",
            "The corpus does not contain enough information to answer this question.",
            "",
            "Sources:",
            "- none",
            "",
            "Confidence:",
            "low",
        ]
    )


def missing_api_key_answer(retrieved_chunks: list[dict]) -> str:
    sources = []
    for chunk in retrieved_chunks:
        metadata = chunk["metadata"]
        sources.append(f"- {metadata['source']}, {metadata['section']}")

    return "\n".join(
        [
            "Answer:",
            "OpenRouter is not configured, so I can only show the retrieved sources.",
            "",
            "Sources:",
            *sources,
            "",
            "Confidence:",
            "low",
        ]
    )


def api_error_answer(error: Exception, retrieved_chunks: list[dict]) -> str:
    sources = []
    for chunk in retrieved_chunks:
        metadata = chunk["metadata"]
        sources.append(f"- {metadata['source']}, {metadata['section']}")

    return "\n".join(
        [
            "Answer:",
            "The relevant sources were retrieved, but answer generation failed.",
            f"Error: {type(error).__name__}",
            "",
            "Sources retrieved:",
            *sources,
            "",
            "Confidence:",
            "low",
        ]
    )
GigaProjects

generation.py

Run this code