GigaProjects

← Back to rag-assistant

inspect_retrieval.py

"""Print retrieved chunks for one question."""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(PROJECT_ROOT))

from src.retrieval import retrieve


def main() -> None:
    args = parse_args()
    results = retrieve(args.question, top_k=args.top_k)

    if not results:
        print("No results")
        return

    for rank, result in enumerate(results, start=1):
        metadata = result["metadata"]
        preview = make_preview(result["text"])

        print(f"{rank}. {metadata['source']}")
        print(f"   section: {metadata['section']}")
        print(f"   type: {metadata['document_type']}")
        print(f"   distance: {result['distance']:.4f}")
        print(f"   preview: {preview}")
        print()


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Inspect retrieval results.")
    parser.add_argument("question", help="Question to retrieve evidence for.")
    parser.add_argument("--top-k", type=int, default=6)
    return parser.parse_args()


def make_preview(text: str, max_length: int = 350) -> str:
    clean_text = " ".join(text.split())
    if len(clean_text) <= max_length:
        return clean_text

    return clean_text[:max_length].rstrip() + "..."


if __name__ == "__main__":
    main()

Run this code