Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/services-to-build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

# Core Application Services
frontend
chat_service
embedder_service
pdf_processor_service
pdf_extraction_service
Expand Down
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ help:
@echo ""
@echo "Single-service commands:"
@echo " make install Install chart (CHART_NAME, ENV)"
@echo " e.g. make install CHART_NAME=pdf-extraction-service ENV=staging"
@echo " e.g. make install CHART_NAME=chat-service ENV=staging"
@echo " make upgrade Upgrade chart (CHART_NAME, ENV)"
@echo " e.g. make upgrade CHART_NAME=embedder-service ENV=prod"
@echo " make uninstall Uninstall chart (CHART_NAME)"
Expand All @@ -39,9 +39,9 @@ help:
@echo " e.g. make lint CHART_NAME=embedder-service"
@echo " make lint-all Lint all charts under ./helm/"
@echo " make status Show status of Helm release (CHART_NAME)"
@echo " e.g. make status CHART_NAME=pdf-extraction-service"
@echo " e.g. make status CHART_NAME=chat-service"
@echo " make port-forward Port-forward a pod to local machine"
@echo " e.g. make port-forward CHART_NAME=pdf-extraction-service LOCAL_PORT=8000 REMOTE_PORT=8000"
@echo " e.g. make port-forward CHART_NAME=chat-service LOCAL_PORT=8000 REMOTE_PORT=8000"
@echo ""
@echo "Multi-service commands:"
@echo " make install-all Install all charts (ENV)"
Expand All @@ -61,12 +61,12 @@ help:
@echo "Development Environment:"
@echo " Use docker-compose for local development:"
@echo " docker-compose up -d # Start all services locally"
@echo " docker-compose logs -f pdf_processor_service # View service logs"
@echo " docker-compose logs -f chat_service # View service logs"
@echo ""
@echo "⚠️ IMPORTANT:"
@echo " Avoid underscores (_) in CHART_NAME or release names."
@echo " Use hyphens (-) instead to follow Kubernetes naming conventions (RFC 1123)."
@echo " Example: use pdf-extraction-service ✅, not pdf_extraction_service ❌"
@echo " Example: use chat-service ✅, not chat_service ❌"

## Install a single Helm chart
install:
Expand Down Expand Up @@ -162,7 +162,7 @@ uninstall-all:
port-forward:
ifeq ($(CHART_NAME),example-service)
@echo "ERROR: CHART_NAME must be specified. Example usage:"
@echo " make port-forward CHART_NAME=pdf-extraction-service LOCAL_PORT=3000 REMOTE_PORT=8000"
@echo " make port-forward CHART_NAME=chat-service LOCAL_PORT=3000 REMOTE_PORT=8000"
@exit 1
else
kubectl --namespace $(NAMESPACE) port-forward \
Expand Down
41 changes: 21 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
> [!NOTE]
> Thank you for visiting! This project is currently a work in progress. Features, documentation, and deployment configurations are actively being developed and may change frequently.

OmniPDF is a PDF analyzer capable of translation, summarization, and captioning.
OmniPDF is a PDF analyzer capable of translation, summarization, captioning and conversational capabilities through Retrieval-Augmented-Generation (RAG).

## Architecture

Expand All @@ -12,7 +12,7 @@ OmniPDF is a PDF analyzer capable of translation, summarization, and captioning.
OmniPDF follows a **microservices architecture** with **centralized orchestration**:

- **pdf-processor-service**: Main hub that coordinates all processing workflows
- **Processing services**: Specialized services for extraction, translation, rendering, and embedding
- **Processing services**: Specialized services for extraction, translation, rendering, embedding, and chat
- **Data layer**: Redis (sessions), ChromaDB (vectors), MinIO (files)
- **AI/ML layer**: vLLM text and vision-language models
- **Service mesh layer**: Istio for mTLS, traffic management, and observability (prestaging/staging/production)
Expand Down Expand Up @@ -45,14 +45,14 @@ docker compose -f docker-compose.gpu.yml up --build
### Kubernetes/OpenShift (Helm)
```bash
# Deploy individual service with explicit environment
helm install pdf-extraction-service ./helm/pdf-extraction-service \
--values ./helm/pdf-extraction-service/values-prestaging.yaml \
helm install chat-service ./helm/chat-service \
--values ./helm/chat-service/values-prestaging.yaml \
--namespace omnipdf

# Deploy all services using deployment script
./scripts/deploy-helm-charts.sh --all --env prestaging

# Deploy RBAC only (13 individual service roles - should be deployed first)
# Deploy RBAC only (14 individual service roles - should be deployed first)
./scripts/deploy-helm-charts.sh --service rbac --env prestaging
```

Expand All @@ -77,8 +77,8 @@ helm install istio-gateway ./helm/istio-gateway \
helm install rbac ./helm/rbac \
--namespace omnipdf-prestaging

# 5. Deploy services with Istio sidecars
for service in frontend pdf-processor-service embedder-service chromadb redis minio cleaner pdf-extraction-service docling-translation-service pdf-renderer-service image-captioner-service metadata-service; do
# 5. Deploy services with Istio sidecars
for service in frontend pdf-processor-service chat-service embedder-service chromadb redis minio cleaner pdf-extraction-service docling-translation-service pdf-renderer-service image-captioner-service metadata-service; do
helm install $service ./helm/$service \
--namespace omnipdf-prestaging \
--values ./helm/$service/values-prestaging.yaml
Expand All @@ -99,9 +99,9 @@ OmniPDF implements **defense-in-depth security** with multiple layers:

### Service Account & RBAC
- **Individual service accounts** for each service with per-service secret isolation
- **13 individual RBAC roles** - one role per service aligned with C4 architecture:
- **14 individual RBAC roles** - one role per service aligned with C4 architecture:
- `pdf-processor-service-role`, `pdf-extraction-service-role`, `docling-translation-service-role`
- `embedder-service-role`, `pdf-renderer-service-role`
- `embedder-service-role`, `chat-service-role`, `pdf-renderer-service-role`
- `image-captioner-service-role`, `metadata-service-role`
- `minio-role`, `chromadb-role`, `redis-role`
- `frontend-role`, `nginx-gateway-role`, `cleaner-role`
Expand All @@ -119,17 +119,18 @@ OmniPDF implements comprehensive zero-trust network policies with explicit servi
| **nginx** | • External traffic (users) | • istio-gateway:80/443<br>• DNS resolution |
| **istio-gateway** | • nginx | • frontend:8501<br>• pdf-processor-service:8000<br>• DNS resolution |
| **frontend** | • istio-gateway | • pdf-processor-service:8000<br>• DNS resolution |
| **pdf-processor-service** | • istio-gateway<br>• frontend | • pdf-extraction-service:8000<br>• docling-translation-service:8000<br>• pdf-renderer-service:8000<br>• embedder-service:8000<br>• metadata-service:8000<br>• minio:9000<br>• redis:6379<br>• DNS resolution |
| **pdf-processor-service** | • istio-gateway<br>• frontend | • pdf-extraction-service:8000<br>• docling-translation-service:8000<br>• pdf-renderer-service:8000<br>• embedder-service:8000<br>• chat-service:8000<br>• metadata-service:8000<br>• minio:9000<br>• redis:6379<br>• DNS resolution |
| **pdf-extraction-service** | • pdf-processor-service | • image-captioner-service:8000<br>• minio:9000<br>• redis:6379<br>• DNS resolution |
| **docling-translation-service** | • pdf-processor-service | • minio:9000<br>• redis:6379<br>• DNS resolution<br>• HTTP/HTTPS (external vLLM text model) |
| **pdf-renderer-service** | • pdf-processor-service | • minio:9000<br>• redis:6379<br>• DNS resolution |
| **embedder-service** | • pdf-processor-service | • chromadb:8000<br>• minio:9000<br>• redis:6379<br>• DNS resolution |
| **chat-service** | • pdf-processor-service | • chromadb:8000<br>• minio:9000<br>• redis:6379<br>• DNS resolution<br>• HTTP/HTTPS (external vLLM text model) |
| **image-captioner-service** | • pdf-extraction-service | • DNS resolution<br>• HTTP/HTTPS (external vLLM vision model) |
| **metadata-service** | • pdf-processor-service | • chromadb:8000<br>• minio:9000<br>• redis:6379<br>• DNS resolution<br>• HTTP/HTTPS (external vLLM text model) |
| **cleaner** | *No ingress (background service)* | • minio:9000<br>• chromadb:8000<br>• redis:6379<br>• DNS resolution |
| **chromadb** | • embedder-service<br>• metadata-service<br>• cleaner | • DNS resolution<br>*No outbound calls* |
| **redis** | • pdf-processor-service<br>• pdf-extraction-service<br>• docling-translation-service<br>• embedder-service<br>• pdf-renderer-service<br>• metadata-service<br>• cleaner | • DNS resolution<br>*No outbound calls* |
| **minio** | • pdf-processor-service<br>• pdf-extraction-service<br>• docling-translation-service<br>• pdf-renderer-service<br>• embedder-service<br>• metadata-service<br>• cleaner | • DNS resolution<br>*No outbound calls* |
| **chromadb** | • embedder-service<br>• chat-service<br>• metadata-service<br>• cleaner | • DNS resolution<br>*No outbound calls* |
| **redis** | • pdf-processor-service<br>• pdf-extraction-service<br>• docling-translation-service<br>• embedder-service<br>• chat-service<br>• pdf-renderer-service<br>• metadata-service<br>• cleaner | • DNS resolution<br>*No outbound calls* |
| **minio** | • pdf-processor-service<br>• pdf-extraction-service<br>• docling-translation-service<br>• pdf-renderer-service<br>• embedder-service<br>• chat-service<br>• metadata-service<br>• cleaner | • DNS resolution<br>*No outbound calls* |

#### Network Policy Configuration

Expand All @@ -151,9 +152,9 @@ OmniPDF implements comprehensive zero-trust network policies with explicit servi
- **External Connectivity**: Managed external vLLM/AI API access through ServiceEntry (Istio) or HTTPS egress

### HPA (Horizontal Pod Autoscaler)
- **8 services** with auto-scaling enabled across 3 tiers:
- **Tier 1 (Critical)**: nginx, pdf-processor-service - aggressive scaling (60-70% thresholds)
- **Tier 2 (Processing)**: pdf-extraction, docling-translation, pdf-renderer - standard scaling (70% thresholds)
- **9 services** with auto-scaling enabled across 3 tiers:
- **Tier 1 (Critical)**: nginx, pdf-processor-service, chat-service - aggressive scaling (60-70% thresholds)
- **Tier 2 (Processing)**: pdf-extraction, docling-translation, pdf-renderer - standard scaling (70% thresholds)
- **Tier 3 (Burst)**: embedder-service, image-captioner-service, metadata-service - conservative scaling (70% thresholds)
- **High availability**: Minimum 1-2 replicas with scaling up to 5-15 replicas based on service tier
- **Resource optimization**: Proactive scaling for user-facing services, workload-responsive for processing services
Expand All @@ -162,13 +163,13 @@ OmniPDF implements comprehensive zero-trust network policies with explicit servi

```bash
# Enable NetworkPolicy for production
helm upgrade pdf-extraction-service ./helm/pdf-extraction-service \
helm upgrade chat-service ./helm/chat-service \
--set networkPolicy.enabled=true \
--namespace omnipdf

# Check service account permissions
kubectl auth can-i get secrets \
--as=system:serviceaccount:omnipdf:pdf-extraction-service \
--as=system:serviceaccount:omnipdf:chat-service \
-n omnipdf

# Monitor HPA status
Expand Down Expand Up @@ -238,11 +239,11 @@ crc config view
## Testing

```bash
# Run all service unit tests (180+ tests across 6 services)
# Run all service unit tests (206+ tests across 7 services)
./scripts/test-all-services.sh

# Run tests for individual service
./scripts/test-single-service.sh pdf-extraction-service
./scripts/test-single-service.sh chat-service

# Security scanning with Trivy
./scripts/scan_with_trivy.sh
Expand Down
10 changes: 8 additions & 2 deletions c4-diagram.puml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ System_Boundary(omnipdf, "OmniPDF System - Prestaging (CRC)") {
System_Boundary(istio_mesh, "Istio Service Mesh") {
Container(istio_gateway, "Istio Ingress Gateway", "Istio Gateway + Envoy", "Handles ingress traffic into the mesh with advanced routing policies")

Container(frontend, "Streamlit Frontend", "Python/Streamlit + Envoy Sidecar", "Web UI for document processing")
Container(frontend, "Streamlit Frontend", "Python/Streamlit + Envoy Sidecar", "Web UI for document processing and chat interface")

Container(pdf_service, "PDF Processor Service", "FastAPI + Envoy Sidecar", "Main orchestrator - manages sessions, coordinates processing workflows")

Expand All @@ -21,6 +21,7 @@ System_Boundary(omnipdf, "OmniPDF System - Prestaging (CRC)") {
Container(pdf_renderer, "PDF Renderer Service", "FastAPI + Envoy Sidecar", "Renders translated content onto PDFs")

Container(embedder, "Embedder Service", "FastAPI + Envoy Sidecar", "Document chunking and embedding generation")
Container(chat_service, "Chat Service", "FastAPI + Envoy Sidecar", "RAG chat interface using retrieved context")
Container(image_captioner, "Image Captioning Service", "FastAPI + Envoy Sidecar", "AI image captioning for extracted images")

Container(metadata_service, "Metadata Service", "FastAPI + Envoy Sidecar", "Document metadata and wordcloud generation")
Expand All @@ -33,7 +34,7 @@ System_Boundary(omnipdf, "OmniPDF System - Prestaging (CRC)") {
}

System_Boundary(external_ai, "External AI Infrastructure") {
Container(vllm_text, "vLLM Text Model", "vLLM Server", "External text-only LLM for translation and metadata")
Container(vllm_text, "vLLM Text Model", "vLLM Server", "External text-only LLM for chat and translation")
Container(vllm_vlm, "vLLM Vision-Language Model", "vLLM Server", "External multimodal VLM for image understanding")
}

Expand All @@ -55,6 +56,7 @@ Rel(pdf_service, pdf_extraction, "Delegates table and image extraction")
Rel(pdf_service, docling_translate, "Requests docling translation")
Rel(pdf_service, pdf_renderer, "Renders previews of extracted content")
Rel(pdf_service, embedder, "Sends chunking + embedding task")
Rel(pdf_service, chat_service, "RAG conversations")
Rel(pdf_service, metadata_service, "Requests metadata generation")

' File storage operations (mTLS within mesh)
Expand All @@ -64,17 +66,20 @@ Rel(pdf_renderer, minio, "Stores rendered PDFs (mTLS)")
Rel(metadata_service, minio, "Stores metadata job status (mTLS)")
Rel(docling_translate, minio, "Stores translated JSON + job status (mTLS)")
Rel(embedder, minio, "Job status storage (mTLS)")
Rel(chat_service, minio, "Future file operations (mTLS)")
Rel(pdf_extraction, image_captioner, "Requests image captioning")

' External AI/ML service communication (HTTP over VPN)
Rel(docling_translate, vllm_text, "Translation requests (HTTP)")
Rel(chat_service, vllm_text, "RAG chat requests (HTTP)")
Rel(metadata_service, vllm_text, "Metadata generation (HTTP)")

' External VLM communication (HTTP over VPN)
Rel(image_captioner, vllm_vlm, "Image captioning requests (HTTP)")

' Vector database operations (mTLS within mesh)
Rel(embedder, chroma, "Store embeddings (mTLS)")
Rel(chat_service, chroma, "Query vectors (mTLS)")
Rel(metadata_service, chroma, "Query vectors for metadata generation (mTLS)")

' Job status and session management (mTLS within mesh)
Expand All @@ -84,6 +89,7 @@ Rel(pdf_renderer, redis, "Document file list management (mTLS)")
Rel(embedder, redis, "Job status polling (mTLS)")
Rel(docling_translate, redis, "Document file list management (mTLS)")
Rel(metadata_service, redis, "Document file list management (mTLS)")
Rel(chat_service, redis, "Future session management (mTLS)")

' Cleanup operations (mTLS within mesh)
Rel(cleaner, minio, "Delete expired files (mTLS)")
Expand Down
36 changes: 36 additions & 0 deletions chat_service/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
FROM python:3.13-slim

WORKDIR /app

ENV PYTHONPATH=/app/chat_service:/app/shared_utils

RUN apt-get update && apt-get install -y build-essential && rm -rf /var/lib/apt/lists/*

# Copy requirements.txt from chat_service folder in root context
COPY chat_service/requirements.txt .

RUN pip install --no-cache-dir -r requirements.txt

# Copy utils folder from root context into /app/utils
COPY shared_utils ./shared_utils

# Copy chat_service folder from root context into /app/chat_service
COPY chat_service ./chat_service

# Remove unnecessary system packages
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get remove --purge -y linux-libc-dev && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Create non-root user for security and set permissions
RUN groupadd -r appuser && useradd -r -g appuser -d /app -s /bin/bash appuser && \
chown -R appuser:appuser /app

USER appuser

# Expose the FastAPI port
EXPOSE 8000

CMD ["sh", "-c", "PYTHONPATH=/app/chat_service uvicorn chat_service.main:app --host 0.0.0.0 --port 8000"]
Comment on lines +5 to +36

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The CMD instruction is incorrectly overriding the PYTHONPATH environment variable, which will likely cause ImportError at runtime. Additionally, the Dockerfile can be optimized to reduce image size by combining RUN layers.

  1. CMD Bug: The ENV at line 5 correctly sets PYTHONPATH to include both chat_service and shared_utils. However, the CMD at line 36 re-defines PYTHONPATH for its execution scope but omits /app/shared_utils. This will prevent modules from shared_utils from being imported.

  2. Layer Optimization: The Dockerfile runs apt-get update twice (lines 7 and 21) and uses separate RUN commands to install and then remove packages. This creates unnecessary layers. These steps should be combined into a single RUN command to install build dependencies, use them to install Python packages, and then clean them up in the same layer.

I suggest refactoring the CMD to use the exec form and rely on the ENV variable, and consolidating the RUN commands for better efficiency and a smaller final image.

ENV PYTHONPATH=/app

RUN apt-get update && \
    apt-get install -y --no-install-recommends build-essential && \
    rm -rf /var/lib/apt/lists/*

# Copy requirements.txt from chat_service folder in root context
COPY chat_service/requirements.txt .

RUN pip install --no-cache-dir -r requirements.txt && \
    apt-get purge -y --auto-remove build-essential && \
    rm -rf /var/lib/apt/lists/*

# Copy utils folder from root context into /app/utils
COPY shared_utils ./shared_utils

# Copy chat_service folder from root context into /app/chat_service
COPY chat_service ./chat_service

# Create non-root user for security and set permissions
RUN groupadd -r appuser && useradd -r -g appuser -d /app -s /bin/bash appuser && \
    chown -R appuser:appuser /app

USER appuser

# Expose the FastAPI port    
EXPOSE 8000

CMD ["uvicorn", "chat_service.main:app", "--host", "0.0.0.0", "--port", "8000"]

23 changes: 23 additions & 0 deletions chat_service/example.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
OPENAI_BASE_URL=http://webworkdgx/vllm_qwen3coder/v1 # Please change this to your LLM Server URL endpoint
OPENAI_API_KEY=lm-studio

# Large Language Model Configuration
OPENAI_MODEL=Qwen2.5-14B-Coder-Instruct
MODEL_TEMPERATURE=0.1
MODEL_MAX_TOKENS=2000
MODEL_TOP_K=5
MODEL_TOP_P=0.8
MODEL_FREQ_PENALTY=0.1
MODEL_PRESENCE_PENALTY=0.1

# Context Management
MODEL_MAX_CONTEXT=4000
MODEL_MIN_SIMILARITY=0.3
MODEL_ENABLE_RERANKING=true

# RAG Optimization
ENABLE_LLM_QUERY_CLASSIFICATION=true
ENABLE_RESPONSE_POST_PROCESSING=true

# Redis storage
REDIS_URL="redis://redis:6379/0?decode_responses=True&protocol=3"
19 changes: 19 additions & 0 deletions chat_service/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from fastapi import FastAPI
from routers import health, chat
from prometheus_fastapi_instrumentator import Instrumentator

import logging

# Set up logger
logging.basicConfig(
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)

app = FastAPI(root_path="/chat")

# Initialize Prometheus metrics
instrumentator = Instrumentator()
instrumentator.instrument(app).expose(app)

app.include_router(health.router)
app.include_router(chat.router)
Empty file.
25 changes: 25 additions & 0 deletions chat_service/models/chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Any


class ChatRequest(BaseModel):
"""
Request model for chat API endpoints.
"""

message: str
session_id: str
doc_ids: List[str] = None
collection_name: str = Field(default="default_collection", description="ChromaDB collection name")


class ChatResponse(BaseModel):
"""
Response model for chat API
"""

response: str
relevant_chunks: List[Dict[str, Any]] = Field(
default_factory=list, description="Additional metadata about the RAG process"
)
metadata: Dict[str, Any]
Loading