nasscomAI · Hina1Thakral · Apr 22, 2026
diff --git a/uc-rag/agents.md b/uc-rag/agents.md
@@ -1,31 +1,25 @@
 # agents.md — UC-RAG RAG Server
-# INSTRUCTIONS:
-# 1. Open your AI tool
-# 2. Paste the full contents of uc-rag/README.md
-# 3. Use this prompt:
-#    "Read this UC README. Using the R.I.C.E framework, generate an
-#     agents.md YAML with four fields: role, intent, context, enforcement.
-#     Enforcement must include every rule listed under
-#     'Enforcement Rules Your agents.md Must Include'.
-#     Output only valid YAML."
-# 4. Paste the output below, replacing this placeholder
-# 5. Check every enforcement rule against the README before saving
 
 role: >
-  [FILL IN: Who is this agent? What is its operational boundary?
-   Hint: a retrieval-augmented policy assistant for city staff]
+  A retrieval-augmented policy assistant for city staff. Your boundary is limited
+  strictly to providing information found within official municipal policy
+  documents related to HR, IT, and Finance.
 
 intent: >
-  [FILL IN: What does a correct output look like?
-   Hint: answer + cited chunks + refusal when not covered]
+  To provide accurate, cited answers to policy queries. When a query is covered
+  by policy, the output must include a clear answer followed by the document
+  name and chunk index. When a query is not covered, the output must strictly
+  follow the refusal template.
 
 context: >
-  [FILL IN: What sources may the agent use?
-   Hint: retrieved chunks only — no general knowledge]
+  Use only the information present in the retrieved chunks provided by the
+  RAG system. You are forbidden from using general knowledge or external
+  information to supplement your answers.
 
 enforcement:
-  - "[FILL IN: Chunk size rule]"
-  - "[FILL IN: Citation rule]"
-  - "[FILL IN: Similarity threshold + refusal rule]"
-  - "[FILL IN: Context grounding rule]"
-  - "[FILL IN: Cross-document rule]"
+  - "Chunk size must not exceed 400 tokens; never split mid-sentence."
+  - "Every answer must cite the source document name and chunk index (e.g., [policy_hr_leave, Chunk 2])."
+  - "If no retrieved chunk scores above similarity threshold 0.6, output the refusal template exactly."
+  - "Answer must use ONLY information present in the retrieved chunks; never add context from outside the set."
+  - "If a query spans two documents, retrieve and cite from each separately; never merge chunks from different documents into a single answer."
+  - "Refusal template: 'This question is not covered in the retrieved policy documents. Retrieved chunks: [list chunk sources]. Please contact the relevant department for guidance.'"
diff --git a/uc-rag/rag_server.py b/uc-rag/rag_server.py
@@ -1,94 +1,210 @@
-"""
-UC-RAG — RAG Server
-rag_server.py — Starter file
-
-Build this using your AI coding tool:
-1. Share the contents of agents.md, skills.md, and uc-rag/README.md
-2. Ask the AI to implement this file following the enforcement rules
-   in agents.md and the skill definitions in skills.md
-3. Run with: python3 rag_server.py --build-index
-4. Then:      python3 rag_server.py --query "your question here"
-
-Stack:
-  pip3 install sentence-transformers chromadb
-  LLM: set your API key in llm_adapter.py (../uc-mcp/llm_adapter.py)
-       or set environment variable GEMINI_API_KEY
-"""
-
 import argparse
 import os
 import sys
+import re
+import chromadb
+from sentence_transformers import SentenceTransformer
+
+# Add parent directory to sys.path to import llm_adapter
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "uc-mcp")))
+from llm_adapter import call_llm
+
+# Fix UnicodeEncodeError for Windows console
+if sys.stdout.encoding != 'utf-8':
+    try:
+        sys.stdout.reconfigure(encoding='utf-8')
+    except AttributeError:
+        # Fallback for older python or non-ANSI environments
+        import codecs
+        sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
 
 # --- SKILL: chunk_documents ---
 def chunk_documents(docs_dir: str, max_tokens: int = 400) -> list[dict]:
     """
     Load all .txt files from docs_dir.
-    Split each into chunks of max_tokens, respecting sentence boundaries.
-    Return list of: {doc_name, chunk_index, text}
-
-    Failure mode to prevent:
-    - Never split mid-sentence (chunk boundary failure)
-    - Never exceed max_tokens per chunk
+    Split each into chunks of max_tokens, respecting sentence and newline boundaries.
     """
-    raise NotImplementedError(
-        "Implement chunk_documents using your AI tool.\n"
-        "Hint: use nltk.sent_tokenize or split on '. ' and accumulate "
-        "sentences until token limit is reached."
-    )
+    chunks = []
+    if not os.path.exists(docs_dir):
+        print(f"Error: Directory {docs_dir} not found.")
+        return chunks
+
+    for filename in os.listdir(docs_dir):
+        if filename.endswith(".txt"):
+            filepath = os.path.join(docs_dir, filename)
+            with open(filepath, "r", encoding="utf-8") as f:
+                content = f.read()
+
+            # Split by section headers AND numbered items (e.g., "5.", "5.1", "5.2")
+            # This creates very focused chunks
+            items = re.split(r'\n+(?=\d+(?:\.\d+)?\s+)', content)
+
+            current_section = ""
+            for i, item_text in enumerate(items):
+                item_text = item_text.strip()
+                if not item_text:
+                    continue
+
+                # Update current section if this is a main header
+                header_match = re.match(r'^(\d+)\.\s+([A-Z\s]{3,})', item_text)
+                if header_match:
+                    current_section = item_text.split('\n')[0]
+
+                # Prepend full breadcrumb context
+                breadcrumb = f"Policy: {filename} > Section: {current_section}"
+                if not header_match and re.match(r'^\d+\.\d+\s+', item_text):
+                    item_num = item_text.split()[0]
+                    breadcrumb += f" > Item: {item_num}"
+
+                chunk_text = f"{breadcrumb}\n{item_text}"
+
+                chunks.append({
+                    "doc_name": filename,
+                    "chunk_index": str(i),
+                    "text": chunk_text
+                })
+
+    return chunks
 
 
 # --- SKILL: retrieve_and_answer ---
 def retrieve_and_answer(
     query: str,
-    collection,          # ChromaDB collection
-    embedder,            # SentenceTransformer model
-    llm_call,            # callable: (prompt: str) -> str
+    collection,
+    embedder,
+    llm_call,
     top_k: int = 3,
     threshold: float = 0.6,
+    context_only: bool = False
 ) -> dict:
     """
-    Embed query, retrieve top_k chunks from ChromaDB.
-    Filter chunks below threshold.
-    If no chunks pass threshold, return refusal template.
-    Otherwise call llm with retrieved chunks as context only.
-    Return: {answer, cited_chunks: [{doc_name, chunk_index, score}]}
-
-    Failure modes to prevent:
-    - Answer outside retrieved context
-    - Cross-document blending
-    - No citation
+    Embed query, retrieve chunks, filter by threshold, and generate grounded answer.
     """
-    raise NotImplementedError(
-        "Implement retrieve_and_answer using your AI tool.\n"
-        "Hint: embed query, query ChromaDB collection, check distances, "
-        "build prompt with retrieved chunks only, call llm_call(prompt)."
+    # Query expansion for common terms
+    expanded_query = f"Policy Section Item {query.lower()}"
+    if "leave without pay" in expanded_query or "lwp" in expanded_query:
+        expanded_query += " LWP approval Department Head HR Director"
+    if "phone" in expanded_query:
+        expanded_query += " device mobile BYOD personal"
+    if "work files" in expanded_query or "work data" in expanded_query:
+        expanded_query += " CMC data restricted confidential storage"
+
+    query_embedding = embedder.encode([expanded_query])[0].tolist()
+
+    results = collection.query(
+        query_embeddings=[query_embedding],
+        n_results=top_k,
+        include=["documents", "metadatas", "distances"]
     )
+
+    valid_chunks = []
+    for i in range(len(results['ids'][0])):
+        # Chromadb cosine distance = 1 - cosine_similarity
+        similarity = 1 - results['distances'][0][i]
+        if similarity >= threshold:
+            valid_chunks.append({
+                "text": results['documents'][0][i],
+                "doc_name": results['metadatas'][0][i]['doc_name'],
+                "chunk_index": results['metadatas'][0][i]['chunk_index'],
+                "score": similarity
+            })
+
+    if not valid_chunks:
+        refusal = (
+            "This question is not covered in the retrieved policy documents. "
+            "Retrieved chunks: [None above threshold]. Please contact the relevant "
+            "department for guidance."
+        )
+        return {"answer": refusal, "cited_chunks": []}
+
+    # Grounded Prompt
+    context_text = "\n\n".join([
+        f"--- Source: {c['doc_name']} (Chunk {c['chunk_index']}) ---\n{c['text']}" 
+        for c in valid_chunks
+    ])
+
+    prompt = f"""You are a Retrieval-Augmented Generation (RAG) assistant for the City Municipal Corporation.
+
+GROUNDING RULES:
+1. ANSWER ONLY using the provided "Retrieved Context" below. 
+2. CITATION: Every claim must be followed by a citation in brackets, e.g., [{valid_chunks[0]['doc_name']}, Chunk {valid_chunks[0]['chunk_index']}].
+3. REFUSAL: If the answer is not contained within the context, you must state that it is not covered.
+4. NO EXTERNAL KNOWLEDGE: Use only the provided documents.
+
+Retrieved Context:
+{context_text}
+
+User Query: {query}
+
+Answer:"""
+
+    if context_only:
+        return {"answer": "[CONTEXT ONLY MODE] LLM call skipped.", "cited_chunks": valid_chunks}
+
+    answer = llm_call(prompt)
+    return {"answer": answer, "cited_chunks": valid_chunks}
 
 
 # --- INDEX BUILDER ---
 def build_index(docs_dir: str, db_path: str = "./chroma_db"):
     """
     Chunk all documents and store embeddings in ChromaDB.
-    Called once before querying.
     """
-    raise NotImplementedError(
-        "Implement build_index using your AI tool.\n"
-        "Hint: call chunk_documents(), embed each chunk with "
-        "SentenceTransformer, upsert into ChromaDB collection."
+    print(f"Loading documents from {docs_dir}...")
+    chunks = chunk_documents(docs_dir)
+    print(f"Created {len(chunks)} chunks.")
+
+    client = chromadb.PersistentClient(path=db_path)
+
+    # Clear existing collection to prevent duplicates
+    try:
+        client.delete_collection(name="policy_docs")
+        print("Cleared existing collection.")
+    except Exception:
+        pass
+
+    # Use cosine similarity for the threshold logic
+    collection = client.create_collection(
+        name="policy_docs", 
+        metadata={"hnsw:space": "cosine"}
     )
+
+    embedder = SentenceTransformer('all-MiniLM-L6-v2')
+
+    print("Embedding and indexing chunks...")
+    for i, chunk in enumerate(chunks):
+        embedding = embedder.encode([chunk['text']])[0].tolist()
+        collection.add(
+            ids=[f"chunk_{i}"],
+            embeddings=[embedding],
+            metadatas=[{
+                "doc_name": chunk['doc_name'],
+                "chunk_index": chunk['chunk_index']
+            }],
+            documents=[chunk['text']]
+        )
+    print("Indexing complete.")
 
 
-# --- NAIVE MODE (run this first to see failure modes) ---
+# --- NAIVE MODE ---
 def naive_query(query: str, docs_dir: str, llm_call):
     """
-    Load all documents into context without retrieval.
-    Run this BEFORE building your RAG pipeline to observe the failure modes.
+    Load all documents into context WITHOUT retrieval or rules.
     """
-    raise NotImplementedError(
-        "Implement naive_query using your AI tool.\n"
-        "Hint: load all .txt files, concatenate, pass to LLM with query. "
-        "No chunking, no retrieval, no enforcement."
-    )
+    all_text = ""
+    for filename in os.listdir(docs_dir):
+        if filename.endswith(".txt"):
+            with open(os.path.join(docs_dir, filename), "r", encoding="utf-8") as f:
+                all_text += f"\n\n--- {filename} ---\n" + f.read()
+
+    prompt = f"""Answer this query using the documents below.
+
+Documents:
+{all_text}
+
+Query: {query}
+"""
+    return llm_call(prompt)
 
 
 # --- MAIN ---
@@ -100,6 +216,10 @@ def main():
                         help="Query the RAG server")
     parser.add_argument("--naive", action="store_true",
                         help="Run naive (no retrieval) mode to see failures")
+    parser.add_argument("--context-only", action="store_true",
+                        help="Only show retrieved context, do not call LLM")
+    parser.add_argument("--threshold", type=float, default=0.6,
+                        help="Similarity threshold (default: 0.6)")
     parser.add_argument("--docs-dir", type=str,
                         default="../data/policy-documents",
                         help="Path to policy documents directory")
@@ -113,22 +233,34 @@ def main():
         sys.exit(1)
 
     if args.build_index:
-        print("Building index...")
         build_index(args.docs_dir, args.db_path)
-        print("Index built. Run with --query to test.")
+        print("Done.")
 
     if args.query:
         if args.naive:
-            # Import LLM adapter from uc-mcp
-            sys.path.insert(0, "../uc-mcp")
-            from llm_adapter import call_llm
             result = naive_query(args.query, args.docs_dir, call_llm)
             print(f"\nNaive answer:\n{result}")
         else:
             # Full RAG query
-            raise NotImplementedError(
-                "Wire up retrieve_and_answer with ChromaDB and embedder here."
+            client = chromadb.PersistentClient(path=args.db_path)
+            collection = client.get_collection(name="policy_docs")
+            embedder = SentenceTransformer('all-MiniLM-L6-v2')
+
+            result = retrieve_and_answer(
+                args.query, 
+                collection, 
+                embedder, 
+                call_llm,
+                threshold=args.threshold,
+                context_only=args.context_only
             )
+
+            print(f"\nRAG Answer:\n{result['answer']}")
+            if result['cited_chunks']:
+                print("\nCited Chunks (Evidence):")
+                for c in result['cited_chunks']:
+                    print(f"\n--- {c['doc_name']} (Chunk {c['chunk_index']}) | Similarity: {c['score']:.4f} ---")
+                    print(c['text'])
 
 
 if __name__ == "__main__":

diff --git a/uc-rag/skills.md b/uc-rag/skills.md
@@ -13,13 +13,13 @@
 
 skills:
   - name: chunk_documents
-    description: "[FILL IN]"
-    input: "[FILL IN: path to policy-documents directory]"
-    output: "[FILL IN: list of chunk dicts with doc_name, chunk_index, text]"
-    error_handling: "[FILL IN: what happens if a file is missing or unreadable]"
+    description: "Loads policy documents and splits them into sentence-aware chunks."
+    input: "Directory path containing .txt policy documents."
+    output: "A list of dictionaries where each dictionary contains doc_name, chunk_index, and the chunk text."
+    error_handling: "If the directory or files are inaccessible, the system logs the error and returns an empty list, preventing the indexer from crashing."
 
   - name: retrieve_and_answer
-    description: "[FILL IN]"
-    input: "[FILL IN: query string]"
-    output: "[FILL IN: answer string + list of cited chunks]"
-    error_handling: "[FILL IN: what happens when no chunk scores above 0.6]"
+    description: "Embeds a query, retrieves the most relevant chunks from ChromaDB, and generates an answer grounded strictly in those chunks."
+    input: "A natural language query string."
+    output: "An answer string accompanied by a list of cited chunk metadata (document name, index, and similarity score)."
+    error_handling: "If no chunks meet the similarity threshold (0.6), the system returns the standard refusal template instead of generating a fallback answer."