Skip to content

Commit e836502

Browse files
authored
Add KV cache configuration and conditional cache router registration (#280)
1 parent 91307fb commit e836502

6 files changed

Lines changed: 47 additions & 5 deletions

File tree

core/api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,9 @@ def _extract_provider(model_name: str) -> str:
307307
# Register logs router
308308
app.include_router(logs_router)
309309

310-
# Register cache router
311-
app.include_router(cache_router)
310+
# Register cache router (only if KV cache is enabled)
311+
if settings.KV_CACHE_ENABLED:
312+
app.include_router(cache_router)
312313

313314
# Register graph router
314315
app.include_router(graph_router)

core/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ class Settings(BaseSettings):
118118
CACHE_CHUNK_MAX_BYTES: int = 10 * 1024 * 1024 * 1024
119119
CACHE_PATH: str = "./storage/cache"
120120

121+
# KV Cache configuration (LlamaCache for semantic caching)
122+
KV_CACHE_ENABLED: bool = False
123+
121124
# Vector store configuration
122125
VECTOR_STORE_PROVIDER: Literal["pgvector"]
123126
VECTOR_STORE_DATABASE_NAME: Optional[str] = None
@@ -475,4 +478,8 @@ def get_settings() -> Settings:
475478
)
476479
settings_dict["TURBOPUFFER_API_KEY"] = os.environ["TURBOPUFFER_API_KEY"]
477480

481+
# Load kv_cache config
482+
if "kv_cache" in config:
483+
settings_dict["KV_CACHE_ENABLED"] = config["kv_cache"].get("enabled", False)
484+
478485
return Settings(**settings_dict)

core/database/postgres_database.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,27 @@ def __init__(
258258
f"max_overflow={max_overflow}, pool_recycle={pool_recycle}s"
259259
)
260260

261+
# Strip parameters that asyncpg doesn't accept as keyword arguments
262+
# These will raise "unexpected keyword argument" errors
263+
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
264+
265+
parsed = urlparse(uri)
266+
query_params = parse_qs(parsed.query)
267+
268+
# List of parameters that asyncpg doesn't accept
269+
incompatible_params = ["sslmode", "channel_binding"]
270+
removed_params = []
271+
272+
for param in incompatible_params:
273+
if param in query_params:
274+
query_params.pop(param, None)
275+
removed_params.append(param)
276+
277+
if removed_params:
278+
logger.debug(f"Removing parameters from PostgreSQL URI (not compatible with asyncpg): {removed_params}")
279+
parsed = parsed._replace(query=urlencode(query_params, doseq=True))
280+
uri = urlunparse(parsed)
281+
261282
# Create async engine with explicit pool settings
262283
self.engine = create_async_engine(
263284
uri,

core/services_init.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,10 @@
1616
from pathlib import Path
1717
from typing import Optional
1818

19-
from core.cache.llama_cache_factory import LlamaCacheFactory
2019
from core.completion.litellm_completion import LiteLLMCompletionModel
2120
from core.config import get_settings
2221
from core.database.postgres_database import PostgresDatabase
2322
from core.embedding.colpali_api_embedding_model import ColpaliApiEmbeddingModel
24-
from core.embedding.colpali_embedding_model import ColpaliEmbeddingModel
2523
from core.embedding.litellm_embedding import LiteLLMEmbeddingModel
2624
from core.parser.morphik_parser import MorphikParser
2725
from core.reranker.flag_reranker import FlagReranker
@@ -118,7 +116,14 @@
118116
# Cache factory
119117
# ---------------------------------------------------------------------------
120118

121-
cache_factory = LlamaCacheFactory(Path(settings.STORAGE_PATH))
119+
cache_factory = None
120+
if settings.KV_CACHE_ENABLED:
121+
from core.cache.llama_cache_factory import LlamaCacheFactory
122+
123+
cache_factory = LlamaCacheFactory(Path(settings.STORAGE_PATH))
124+
logger.info("KV cache enabled - initialized LlamaCacheFactory")
125+
else:
126+
logger.info("KV cache disabled")
122127

123128
# ---------------------------------------------------------------------------
124129
# ColPali multi-vector support
@@ -138,6 +143,8 @@
138143
colpali_vector_store = None
139144
case "local":
140145
logger.info("Initializing ColPali in local mode")
146+
from core.embedding.colpali_embedding_model import ColpaliEmbeddingModel
147+
141148
colpali_embedding_model = ColpaliEmbeddingModel()
142149
# Choose multivector store implementation based on provider and dual ingestion setting
143150
if settings.ENABLE_DUAL_MULTIVECTOR_INGESTION:

morphik.docker.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ provider = "pgvector"
128128
[multivector_store]
129129
provider = "postgres"
130130

131+
[kv_cache]
132+
enabled = false # Enable LlamaCache for semantic caching
133+
131134
[redis]
132135
url = "redis://redis:6379/0" # Docker service name
133136
host = "redis"

morphik.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ provider = "pgvector"
134134
[multivector_store]
135135
provider = "postgres" # "morphik" # "postgres" # "morphik" # "postgres" # "postgres" or "morphik" for fast implementation
136136

137+
[kv_cache]
138+
enabled = false # Enable LlamaCache for semantic caching
139+
137140
[redis]
138141
url = "redis://localhost:6379/0" # Full Redis URL (takes precedence over host/port)
139142
host = "localhost"

0 commit comments

Comments
 (0)