Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 16 additions & 22 deletions uc-rag/agents.md
Original file line number Diff line number Diff line change
@@ -1,31 +1,25 @@
# agents.md — UC-RAG RAG Server
# INSTRUCTIONS:
# 1. Open your AI tool
# 2. Paste the full contents of uc-rag/README.md
# 3. Use this prompt:
# "Read this UC README. Using the R.I.C.E framework, generate an
# agents.md YAML with four fields: role, intent, context, enforcement.
# Enforcement must include every rule listed under
# 'Enforcement Rules Your agents.md Must Include'.
# Output only valid YAML."
# 4. Paste the output below, replacing this placeholder
# 5. Check every enforcement rule against the README before saving

role: >
[FILL IN: Who is this agent? What is its operational boundary?
Hint: a retrieval-augmented policy assistant for city staff]
A retrieval-augmented policy assistant for city staff. Your boundary is limited
strictly to providing information found within official municipal policy
documents related to HR, IT, and Finance.

intent: >
[FILL IN: What does a correct output look like?
Hint: answer + cited chunks + refusal when not covered]
To provide accurate, cited answers to policy queries. When a query is covered
by policy, the output must include a clear answer followed by the document
name and chunk index. When a query is not covered, the output must strictly
follow the refusal template.

context: >
[FILL IN: What sources may the agent use?
Hint: retrieved chunks only — no general knowledge]
Use only the information present in the retrieved chunks provided by the
RAG system. You are forbidden from using general knowledge or external
information to supplement your answers.

enforcement:
- "[FILL IN: Chunk size rule]"
- "[FILL IN: Citation rule]"
- "[FILL IN: Similarity threshold + refusal rule]"
- "[FILL IN: Context grounding rule]"
- "[FILL IN: Cross-document rule]"
- "Chunk size must not exceed 400 tokens; never split mid-sentence."
- "Every answer must cite the source document name and chunk index (e.g., [policy_hr_leave, Chunk 2])."
- "If no retrieved chunk scores above similarity threshold 0.6, output the refusal template exactly."
- "Answer must use ONLY information present in the retrieved chunks; never add context from outside the set."
- "If a query spans two documents, retrieve and cite from each separately; never merge chunks from different documents into a single answer."
- "Refusal template: 'This question is not covered in the retrieved policy documents. Retrieved chunks: [list chunk sources]. Please contact the relevant department for guidance.'"
262 changes: 197 additions & 65 deletions uc-rag/rag_server.py
Original file line number Diff line number Diff line change
@@ -1,94 +1,210 @@
"""
UC-RAG — RAG Server
rag_server.py — Starter file

Build this using your AI coding tool:
1. Share the contents of agents.md, skills.md, and uc-rag/README.md
2. Ask the AI to implement this file following the enforcement rules
in agents.md and the skill definitions in skills.md
3. Run with: python3 rag_server.py --build-index
4. Then: python3 rag_server.py --query "your question here"

Stack:
pip3 install sentence-transformers chromadb
LLM: set your API key in llm_adapter.py (../uc-mcp/llm_adapter.py)
or set environment variable GEMINI_API_KEY
"""

import argparse
import os
import sys
import re
import chromadb
from sentence_transformers import SentenceTransformer

# Add parent directory to sys.path to import llm_adapter
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "uc-mcp")))
from llm_adapter import call_llm

# Fix UnicodeEncodeError for Windows console
if sys.stdout.encoding != 'utf-8':
try:
sys.stdout.reconfigure(encoding='utf-8')
except AttributeError:
# Fallback for older python or non-ANSI environments
import codecs
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())

# --- SKILL: chunk_documents ---
def chunk_documents(docs_dir: str, max_tokens: int = 400) -> list[dict]:
"""
Load all .txt files from docs_dir.
Split each into chunks of max_tokens, respecting sentence boundaries.
Return list of: {doc_name, chunk_index, text}

Failure mode to prevent:
- Never split mid-sentence (chunk boundary failure)
- Never exceed max_tokens per chunk
Split each into chunks of max_tokens, respecting sentence and newline boundaries.
"""
raise NotImplementedError(
"Implement chunk_documents using your AI tool.\n"
"Hint: use nltk.sent_tokenize or split on '. ' and accumulate "
"sentences until token limit is reached."
)
chunks = []
if not os.path.exists(docs_dir):
print(f"Error: Directory {docs_dir} not found.")
return chunks

for filename in os.listdir(docs_dir):
if filename.endswith(".txt"):
filepath = os.path.join(docs_dir, filename)
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()

# Split by section headers AND numbered items (e.g., "5.", "5.1", "5.2")
# This creates very focused chunks
items = re.split(r'\n+(?=\d+(?:\.\d+)?\s+)', content)

current_section = ""
for i, item_text in enumerate(items):
item_text = item_text.strip()
if not item_text:
continue

# Update current section if this is a main header
header_match = re.match(r'^(\d+)\.\s+([A-Z\s]{3,})', item_text)
if header_match:
current_section = item_text.split('\n')[0]

# Prepend full breadcrumb context
breadcrumb = f"Policy: {filename} > Section: {current_section}"
if not header_match and re.match(r'^\d+\.\d+\s+', item_text):
item_num = item_text.split()[0]
breadcrumb += f" > Item: {item_num}"

chunk_text = f"{breadcrumb}\n{item_text}"

chunks.append({
"doc_name": filename,
"chunk_index": str(i),
"text": chunk_text
})

return chunks


# --- SKILL: retrieve_and_answer ---
def retrieve_and_answer(
query: str,
collection, # ChromaDB collection
embedder, # SentenceTransformer model
llm_call, # callable: (prompt: str) -> str
collection,
embedder,
llm_call,
top_k: int = 3,
threshold: float = 0.6,
context_only: bool = False
) -> dict:
"""
Embed query, retrieve top_k chunks from ChromaDB.
Filter chunks below threshold.
If no chunks pass threshold, return refusal template.
Otherwise call llm with retrieved chunks as context only.
Return: {answer, cited_chunks: [{doc_name, chunk_index, score}]}

Failure modes to prevent:
- Answer outside retrieved context
- Cross-document blending
- No citation
Embed query, retrieve chunks, filter by threshold, and generate grounded answer.
"""
raise NotImplementedError(
"Implement retrieve_and_answer using your AI tool.\n"
"Hint: embed query, query ChromaDB collection, check distances, "
"build prompt with retrieved chunks only, call llm_call(prompt)."
# Query expansion for common terms
expanded_query = f"Policy Section Item {query.lower()}"
if "leave without pay" in expanded_query or "lwp" in expanded_query:
expanded_query += " LWP approval Department Head HR Director"
if "phone" in expanded_query:
expanded_query += " device mobile BYOD personal"
if "work files" in expanded_query or "work data" in expanded_query:
expanded_query += " CMC data restricted confidential storage"

query_embedding = embedder.encode([expanded_query])[0].tolist()

results = collection.query(
query_embeddings=[query_embedding],
n_results=top_k,
include=["documents", "metadatas", "distances"]
)

valid_chunks = []
for i in range(len(results['ids'][0])):
# Chromadb cosine distance = 1 - cosine_similarity
similarity = 1 - results['distances'][0][i]
if similarity >= threshold:
valid_chunks.append({
"text": results['documents'][0][i],
"doc_name": results['metadatas'][0][i]['doc_name'],
"chunk_index": results['metadatas'][0][i]['chunk_index'],
"score": similarity
})

if not valid_chunks:
refusal = (
"This question is not covered in the retrieved policy documents. "
"Retrieved chunks: [None above threshold]. Please contact the relevant "
"department for guidance."
)
return {"answer": refusal, "cited_chunks": []}

# Grounded Prompt
context_text = "\n\n".join([
f"--- Source: {c['doc_name']} (Chunk {c['chunk_index']}) ---\n{c['text']}"
for c in valid_chunks
])

prompt = f"""You are a Retrieval-Augmented Generation (RAG) assistant for the City Municipal Corporation.

GROUNDING RULES:
1. ANSWER ONLY using the provided "Retrieved Context" below.
2. CITATION: Every claim must be followed by a citation in brackets, e.g., [{valid_chunks[0]['doc_name']}, Chunk {valid_chunks[0]['chunk_index']}].
3. REFUSAL: If the answer is not contained within the context, you must state that it is not covered.
4. NO EXTERNAL KNOWLEDGE: Use only the provided documents.

Retrieved Context:
{context_text}

User Query: {query}

Answer:"""

if context_only:
return {"answer": "[CONTEXT ONLY MODE] LLM call skipped.", "cited_chunks": valid_chunks}

answer = llm_call(prompt)
return {"answer": answer, "cited_chunks": valid_chunks}


# --- INDEX BUILDER ---
def build_index(docs_dir: str, db_path: str = "./chroma_db"):
"""
Chunk all documents and store embeddings in ChromaDB.
Called once before querying.
"""
raise NotImplementedError(
"Implement build_index using your AI tool.\n"
"Hint: call chunk_documents(), embed each chunk with "
"SentenceTransformer, upsert into ChromaDB collection."
print(f"Loading documents from {docs_dir}...")
chunks = chunk_documents(docs_dir)
print(f"Created {len(chunks)} chunks.")

client = chromadb.PersistentClient(path=db_path)

# Clear existing collection to prevent duplicates
try:
client.delete_collection(name="policy_docs")
print("Cleared existing collection.")
except Exception:
pass

# Use cosine similarity for the threshold logic
collection = client.create_collection(
name="policy_docs",
metadata={"hnsw:space": "cosine"}
)

embedder = SentenceTransformer('all-MiniLM-L6-v2')

print("Embedding and indexing chunks...")
for i, chunk in enumerate(chunks):
embedding = embedder.encode([chunk['text']])[0].tolist()
collection.add(
ids=[f"chunk_{i}"],
embeddings=[embedding],
metadatas=[{
"doc_name": chunk['doc_name'],
"chunk_index": chunk['chunk_index']
}],
documents=[chunk['text']]
)
print("Indexing complete.")


# --- NAIVE MODE (run this first to see failure modes) ---
# --- NAIVE MODE ---
def naive_query(query: str, docs_dir: str, llm_call):
"""
Load all documents into context without retrieval.
Run this BEFORE building your RAG pipeline to observe the failure modes.
Load all documents into context WITHOUT retrieval or rules.
"""
raise NotImplementedError(
"Implement naive_query using your AI tool.\n"
"Hint: load all .txt files, concatenate, pass to LLM with query. "
"No chunking, no retrieval, no enforcement."
)
all_text = ""
for filename in os.listdir(docs_dir):
if filename.endswith(".txt"):
with open(os.path.join(docs_dir, filename), "r", encoding="utf-8") as f:
all_text += f"\n\n--- {filename} ---\n" + f.read()

prompt = f"""Answer this query using the documents below.

Documents:
{all_text}

Query: {query}
"""
return llm_call(prompt)


# --- MAIN ---
Expand All @@ -100,6 +216,10 @@ def main():
help="Query the RAG server")
parser.add_argument("--naive", action="store_true",
help="Run naive (no retrieval) mode to see failures")
parser.add_argument("--context-only", action="store_true",
help="Only show retrieved context, do not call LLM")
parser.add_argument("--threshold", type=float, default=0.6,
help="Similarity threshold (default: 0.6)")
parser.add_argument("--docs-dir", type=str,
default="../data/policy-documents",
help="Path to policy documents directory")
Expand All @@ -113,22 +233,34 @@ def main():
sys.exit(1)

if args.build_index:
print("Building index...")
build_index(args.docs_dir, args.db_path)
print("Index built. Run with --query to test.")
print("Done.")

if args.query:
if args.naive:
# Import LLM adapter from uc-mcp
sys.path.insert(0, "../uc-mcp")
from llm_adapter import call_llm
result = naive_query(args.query, args.docs_dir, call_llm)
print(f"\nNaive answer:\n{result}")
else:
# Full RAG query
raise NotImplementedError(
"Wire up retrieve_and_answer with ChromaDB and embedder here."
client = chromadb.PersistentClient(path=args.db_path)
collection = client.get_collection(name="policy_docs")
embedder = SentenceTransformer('all-MiniLM-L6-v2')

result = retrieve_and_answer(
args.query,
collection,
embedder,
call_llm,
threshold=args.threshold,
context_only=args.context_only
)

print(f"\nRAG Answer:\n{result['answer']}")
if result['cited_chunks']:
print("\nCited Chunks (Evidence):")
for c in result['cited_chunks']:
print(f"\n--- {c['doc_name']} (Chunk {c['chunk_index']}) | Similarity: {c['score']:.4f} ---")
print(c['text'])


if __name__ == "__main__":
Expand Down
16 changes: 8 additions & 8 deletions uc-rag/skills.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@

skills:
- name: chunk_documents
description: "[FILL IN]"
input: "[FILL IN: path to policy-documents directory]"
output: "[FILL IN: list of chunk dicts with doc_name, chunk_index, text]"
error_handling: "[FILL IN: what happens if a file is missing or unreadable]"
description: "Loads policy documents and splits them into sentence-aware chunks."
input: "Directory path containing .txt policy documents."
output: "A list of dictionaries where each dictionary contains doc_name, chunk_index, and the chunk text."
error_handling: "If the directory or files are inaccessible, the system logs the error and returns an empty list, preventing the indexer from crashing."

- name: retrieve_and_answer
description: "[FILL IN]"
input: "[FILL IN: query string]"
output: "[FILL IN: answer string + list of cited chunks]"
error_handling: "[FILL IN: what happens when no chunk scores above 0.6]"
description: "Embeds a query, retrieves the most relevant chunks from ChromaDB, and generates an answer grounded strictly in those chunks."
input: "A natural language query string."
output: "An answer string accompanied by a list of cited chunk metadata (document name, index, and similarity score)."
error_handling: "If no chunks meet the similarity threshold (0.6), the system returns the standard refusal template instead of generating a fallback answer."