-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathingest.py
More file actions
38 lines (24 loc) · 1 KB
/
Copy pathingest.py
File metadata and controls
38 lines (24 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from rich import print
from rag.loader import load_documents
from rag.index import build_index
def ingest_command(path: str):
"""
Ingest documents into the vector database
"""
print("[green]Starting ingestion ...[/green]")
# load documents and split into chunks
print(f"[cyan]Loading documents from: {path}[/cyan]")
chunks, num_files = load_documents(path)
if not chunks:
raise ValueError("No documents were loaded. Check input directory.")
print(f"[cyan]Loaded {len(chunks)} chunks from {num_files} files.[/cyan]")
# ingestion summary (important for debugging RAG quality)
print("[yellow]Ingestion Summary:[/yellow]")
print(f"[yellow]- Files processed: {num_files}[/yellow]")
print(f"[yellow]- Chunks created: {len(chunks)}[/yellow]")
# build index
print("[cyan]Building index...[/cyan]")
index = build_index(chunks)
print("[cyan]Index was built successfully![/cyan]")
print("[green]Ingestion complete![/green]")
return index