"""Build the local ChromaDB retrieval index."""
from __future__ import annotations
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(PROJECT_ROOT))
from src.chunking import chunk_documents
from src.config import CORPUS_DIR, INDEX_DIR, get_embedding_model_name
from src.documents import load_documents
from src.embeddings import embed_texts
from src.index_store import add_chunks, reset_collection
def main() -> None:
print(f"Loading documents from {CORPUS_DIR}")
documents = load_documents(CORPUS_DIR)
print(f"Loaded {len(documents)} documents")
chunks = chunk_documents(documents)
print(f"Created {len(chunks)} chunks")
if not chunks:
print("No chunks to index")
return
print(f"Embedding chunks with {get_embedding_model_name()}")
embeddings = embed_texts([chunk["searchable_text"] for chunk in chunks])
print(f"Writing ChromaDB index to {INDEX_DIR}")
collection = reset_collection(INDEX_DIR)
add_chunks(collection, chunks, embeddings)
print(f"Indexed {len(chunks)} chunks")
if __name__ == "__main__":
main()