GigaProjects

← Back to rag-assistant

index_store.py

"""ChromaDB persistence and search helpers."""

from __future__ import annotations

from pathlib import Path


COLLECTION_NAME = "reinsurance_knowledge_base"


def create_client(index_dir: Path):
    import chromadb

    index_dir.mkdir(parents=True, exist_ok=True)
    return chromadb.PersistentClient(path=str(index_dir))


def reset_collection(index_dir: Path):
    client = create_client(index_dir)

    existing_names = [collection.name for collection in client.list_collections()]
    if COLLECTION_NAME in existing_names:
        client.delete_collection(COLLECTION_NAME)

    return client.create_collection(name=COLLECTION_NAME)


def get_collection(index_dir: Path):
    client = create_client(index_dir)
    return client.get_collection(name=COLLECTION_NAME)


def add_chunks(collection, chunks: list[dict], embeddings: list[list[float]]) -> None:
    if not chunks:
        return

    collection.add(
        ids=[chunk["id"] for chunk in chunks],
        embeddings=embeddings,
        documents=[chunk["searchable_text"] for chunk in chunks],
        metadatas=[metadata_for(chunk) for chunk in chunks],
    )


def search(collection, query_embedding: list[float], top_k: int) -> list[dict]:
    result = collection.query(query_embeddings=[query_embedding], n_results=top_k)

    ids = result.get("ids", [[]])[0]
    documents = result.get("documents", [[]])[0]
    metadatas = result.get("metadatas", [[]])[0]
    distances = result.get("distances", [[]])[0]

    rows = []
    for index, chunk_id in enumerate(ids):
        rows.append(
            {
                "id": chunk_id,
                "text": documents[index],
                "metadata": metadatas[index],
                "distance": distances[index],
            }
        )

    return rows


def metadata_for(chunk: dict) -> dict:
    return {
        "source": chunk["source"],
        "relative_path": chunk["relative_path"],
        "document_type": chunk["document_type"],
        "section": chunk["section"],
    }

Run this code