matdev83 · matdev83 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.kiro/specs/model-capabilities-catalog/design.md b/.kiro/specs/model-capabilities-catalog/design.md
@@ -149,8 +149,10 @@ internal/infra/modelcatalog/modelsdev/
 - `internal/infra/runtimebundle/build.go` - build concrete catalog source/cache adapters, core catalog runtime, override resolver, wrapped capability resolver, eligibility resolver, and closers.
 - `internal/infra/runtimebundle/built.go` - expose catalog diagnostics/status provider for HTTP wiring.
 - `internal/stdhttp/server.go` - mount catalog diagnostics path when configured.
-- `internal/core/diag` new files - add a catalog status handler or DTO adapter if diagnostics are not served directly from modelcatalog.
-- `docs/capability-catalogs.md` - update operator and maintainer rules for models.dev plus overrides.
+- `internal/stdhttp/catalog_status_handler.go` - GET JSON catalog status via `modelcatalog.BuildCatalogDiagnosticsJSON`.
+- `internal/core/diag/route_trace.go` (extended) - optional `RouteTraceCatalog` metadata on route trace entries (`RouteTraceEntry.Catalog`).
+- `internal/core/modelcatalog/ports.go` - `ActiveSnapshotProvider` seam so `CatalogResolver` reads the **current** `CatalogRuntime` snapshot on each resolve (refresh affects subsequent decisions without swapping the resolver at the bundle).
+- `docs/capability-catalogs.md` - operator and maintainer rules for models.dev plus overrides.
 
 ## System Flows
 

diff --git a/.kiro/specs/model-capabilities-catalog/spec.json b/.kiro/specs/model-capabilities-catalog/spec.json
@@ -1,22 +1,22 @@
 {
   "feature_name": "model-capabilities-catalog",
   "created_at": "2026-04-24T19:10:44Z",
-  "updated_at": "2026-04-24T20:14:00Z",
+  "updated_at": "2026-04-24T20:14:00Z",
   "language": "en",
-  "phase": "tasks-generated",
+  "phase": "tasks-generated",
   "approvals": {
     "requirements": {
-      "generated": true,
-      "approved": true
+      "generated": true,
+      "approved": true
     },
     "design": {
-      "generated": true,
-      "approved": true
+      "generated": true,
+      "approved": true
     },
     "tasks": {
-      "generated": true,
-      "approved": true
+      "generated": true,
+      "approved": true
     }
   },
-  "ready_for_implementation": false
+  "ready_for_implementation": true
 }
diff --git a/.kiro/specs/model-capabilities-catalog/tasks.md b/.kiro/specs/model-capabilities-catalog/tasks.md
@@ -1,44 +1,44 @@
 # Implementation Plan
 
-- [ ] 1. Establish catalog configuration and typed contracts
-- [ ] 1.1 Add operator configuration for catalog usage, updates, cache, source, diagnostics, and overrides
+- [x] 1. Establish catalog configuration and typed contracts
+- [x] 1.1 Add operator configuration for catalog usage, updates, cache, source, diagnostics, and overrides
   - Add typed settings for enabling catalog usage, enabling external updates, update timing, source location, local cache location, diagnostics path, model overrides, and backend/model overrides.
   - Validate invalid durations, malformed source locations, invalid diagnostics paths, and malformed override keys with field-specific errors.
   - Treat startup validation as the required path; if runtime reload support later covers this config, it must reuse the same validation outcome.
   - Done when valid sample configurations load and invalid catalog configurations fail before runtime build.
   - _Requirements: 2.1, 2.2, 2.3, 2.4, 2.6, 5.1, 5.2, 10.2_
   - _Boundary: ModelCatalogConfig_
-- [ ] 1.2 Define source-neutral model facts and diagnostic enums
+- [x] 1.2 Define source-neutral model facts and diagnostic enums
   - Add internal representations for tri-state capabilities, optional limits, fact source, match kind, snapshot metadata, and eligibility reasons.
   - Define consumer-owned snapshot source/cache ports used by the core catalog runtime, without HTTP or filesystem details.
   - Ensure unknown capability and unknown limit states are distinct from explicit unsupported values.
   - Done when core catalog unit tests can construct facts for catalog, model override, backend/model override, backend declaration, no-match, and ambiguous-match cases, and fake snapshot ports compile against the core-owned contracts.
   - _Requirements: 1.1, 1.2, 3.1, 3.2, 3.3, 3.6, 4.6, 9.7, 10.5_
   - _Boundary: ModelFacts, CatalogRuntime_
-- [ ] 1.3 Add sample configuration coverage for disabled-by-default behavior
+- [x] 1.3 Add sample configuration coverage for disabled-by-default behavior
   - Update the sample config with commented catalog settings and override examples without enabling external fetches by default.
   - Add drift coverage so the documented sample continues to parse with catalog settings present.
   - Done when the sample config validates with catalog disabled and includes clear examples for both override levels.
   - _Requirements: 2.1, 2.2, 2.5, 11.4_
   - _Boundary: ModelCatalogConfig_
   - _Depends: 1.1_
 
-- [ ] 2. Implement model matching and override precedence
-- [ ] 2.1 (P) Build deterministic model-name matching
+- [x] 2. Implement model matching and override precedence
+- [x] 2.1 (P) Build deterministic model-name matching
   - Match exact catalog model names before normalized prefix-stripped names.
   - Return explicit exact, non-exact, ambiguous, and no-match outcomes without rewriting the route model.
   - Done when table tests prove exact match, `amazon/claude-sonnet-4` style normalized match, multiple-match ambiguity, and no-match outcome classification.
   - _Requirements: 4.1, 4.2, 4.3, 4.4, 4.6_
   - _Boundary: Matcher_
   - _Depends: 1.2_
-- [ ] 2.2 (P) Implement administrator override resolution
+- [x] 2.2 (P) Implement administrator override resolution
   - Resolve backend/model overrides before model-only overrides.
   - Accept overrides for models that are not present in any catalog snapshot and mark them as operator-defined facts.
   - Done when tests prove pair override wins, model override falls back, unknown override is accepted, and source precedence is visible.
   - _Requirements: 5.1, 5.2, 5.3, 5.4, 5.7, 5.8_
   - _Boundary: OverrideResolver_
   - _Depends: 1.1, 1.2_
-- [ ] 2.3 Combine overrides, catalog matches, and backend declarations into effective facts
+- [x] 2.3 Combine overrides, catalog matches, and backend declarations into effective facts
   - Apply precedence: backend/model override, model override, catalog match, then no feature match.
   - Intersect matching model facts with backend adapter capabilities so adapter unsupported features remain unsupported.
   - Preserve backend-only behavior when no override or catalog match applies.
@@ -47,32 +47,32 @@
   - _Boundary: CatalogResolver_
   - _Depends: 2.1, 2.2_
 
-- [ ] 3. Implement models.dev ingestion and local snapshot lifecycle
-- [ ] 3.1 Parse and normalize the models.dev payload subset
+- [x] 3. Implement models.dev ingestion and local snapshot lifecycle
+- [x] 3.1 Parse and normalize the models.dev payload subset
   - Decode provider/model maps and normalize only runtime-relevant fields: modalities, tools, reasoning, structured outputs, and limits.
   - Treat sparse or missing fields as unknown unless they explicitly map to supported or unsupported facts.
   - Establish the validated snapshot wire shape consumed by local cache loading and saving.
   - Done when parser tests cover valid payloads, missing optional fields, unsupported schema, invalid JSON, and ignored pricing/provider metadata.
   - _Requirements: 1.6, 3.1, 3.2, 3.3, 3.6, 10.5_
   - _Boundary: ModelsDevSource_
   - _Depends: 1.2_
-- [ ] 3.2 Implement local snapshot cache load and atomic save
+- [x] 3.2 Implement local snapshot cache load and atomic save
   - Implement the filesystem-backed cache adapter for the core-owned snapshot cache port.
   - Load the latest valid local snapshot during startup and reject corrupt or unsupported cached data.
   - Save only validated snapshots with proxy-owned fetched timestamp and content hash metadata.
   - Done when tests prove valid load, corrupt cache rejection, atomic save/readback, and deterministic snapshot metadata.
   - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.7, 8.4, 8.5_
   - _Boundary: SnapshotStore_
   - _Depends: 3.1_
-- [ ] 3.3 Implement external fetch behavior with privacy safeguards
+- [x] 3.3 Implement external fetch behavior with privacy safeguards
   - Implement the HTTP-backed source adapter for the core-owned snapshot source port.
   - Fetch the configured catalog source through the shared runtime HTTP client only when external updates are enabled.
   - Ensure requests contain no prompts, tool payloads, session transcripts, or provider API keys.
   - Done when HTTP-stub tests prove successful fetch, HTTP failure classification, disabled external access, and absence of sensitive headers or payload content.
   - _Requirements: 1.3, 10.1, 10.2, 10.3, 10.4_
   - _Boundary: ModelsDevSource_
   - _Depends: 3.1_
-- [ ] 3.4 Build non-blocking refresh lifecycle and status tracking
+- [x] 3.4 Build non-blocking refresh lifecycle and status tracking
   - Implement refresh coordination in the core catalog runtime using the snapshot source/cache ports.
   - Start refresh work only when automatic updates are enabled and stop it cleanly during runtime shutdown.
   - Retry failed updates according to configured timing while retaining the latest valid local snapshot.
@@ -81,15 +81,15 @@
   - _Boundary: CatalogRuntime_
   - _Depends: 3.2, 3.3_
 
-- [ ] 4. Implement context-size estimation and catalog eligibility
-- [ ] 4.1 (P) Add conservative request-size estimation
+- [x] 4. Implement context-size estimation and catalog eligibility
+- [x] 4.1 (P) Add conservative request-size estimation
   - Estimate request size from available canonical request content and mark the estimate basis for diagnostics.
   - Return unavailable when a deterministic estimate cannot be produced, including unavailable session contribution.
   - Done when tests prove available estimates, unavailable estimates, and diagnostic basis values for representative text and tool-containing calls.
   - _Requirements: 7.1, 7.2, 7.5, 7.7_
   - _Boundary: SizeEstimator_
   - _Depends: 1.2_
-- [ ] 4.2 Apply context-limit eligibility without duplicating capability negotiation
+- [x] 4.2 Apply context-limit eligibility without duplicating capability negotiation
   - Consume already-resolved effective facts instead of raw backend capability declarations.
   - Exclude candidates only when matching admin/catalog facts provide a known context limit and an available estimate exceeds it.
   - Return eligible for no-match, ambiguous-match, missing limit, or unavailable estimate cases.
@@ -99,61 +99,61 @@
   - _Boundary: EligibilityResolver_
   - _Depends: 2.3, 4.1_
 
-- [ ] 5. Wire catalog facts into runtime execution
-- [ ] 5.1 Compose the catalog runtime in the standard runtime bundle
+- [x] 5. Wire catalog facts into runtime execution
+- [x] 5.1 Compose the catalog runtime in the standard runtime bundle
   - Build config-derived overrides, concrete source/cache adapters, core catalog runtime, optional local snapshot loading, optional refresh, and the effective catalog resolver during runtime composition.
   - Register refresh shutdown with existing runtime closers.
   - Wire the catalog resolver into the existing candidate-aware capability resolver path without changing backend plugin contracts.
   - Done when runtime bundle tests prove catalog disabled behavior, catalog enabled with cache behavior, refresh closer registration, resolver wiring, and invalid config rejection.
   - _Requirements: 1.1, 1.4, 2.1, 2.2, 2.4, 2.5, 10.2, 11.4_
   - _Boundary: RuntimeBundle_
   - _Depends: 1.1, 2.3, 3.2, 3.4, 4.2_
-- [ ] 5.2 Route effective capabilities through canonical negotiation
+- [x] 5.2 Route effective capabilities through canonical negotiation
   - Feed catalog/admin effective capabilities into existing capability negotiation for capability reject and downgrade decisions.
   - Preserve existing backend declarations when catalog usage is disabled or no override/catalog match applies.
   - Done when executor tests prove catalog-derived capability mismatch is rejected by canonical negotiation, downgradable behavior remains unchanged, and no-match uses backend-only capabilities.
   - _Requirements: 3.5, 6.2, 6.3, 6.5, 8.1, 8.2, 11.1, 11.2, 11.3, 11.4_
   - _Boundary: Executor Integration_
   - _Depends: 5.1_
-- [ ] 5.3 Apply context-limit exclusions in the existing candidate failover loop
+- [x] 5.3 Apply context-limit exclusions in the existing candidate failover loop
   - Exclude context-ineligible candidates before backend open using the existing route exclusion mechanism.
   - Preserve weighted routing and ordered failover among remaining compatible candidates.
   - Done when runtime tests prove incompatible first candidate is skipped, compatible fallback executes, all-context-excluded returns an explicit context-limit error, and no post-output switch is introduced.
   - _Requirements: 6.1, 6.6, 7.3, 7.6, 8.1, 8.2, 8.3_
   - _Boundary: Executor Integration_
   - _Depends: 5.2_
 
-- [ ] 6. Add diagnostics and operator visibility
-- [ ] 6.1 Expose catalog status diagnostics
+- [x] 6. Add diagnostics and operator visibility
+- [x] 6.1 Expose catalog status diagnostics
   - Add a protected diagnostics response for enabled, disabled, unavailable, stale, snapshot generation, fetched timestamp, and latest refresh failure category.
   - Ensure diagnostics redact source credentials and do not expose prompts, sessions, tool payloads, or provider API keys.
   - Done when HTTP tests prove protected access, disabled status, unavailable status, snapshot freshness, failure category, and redaction behavior.
   - _Requirements: 1.4, 1.7, 9.1, 9.2, 9.6, 10.4_
   - _Boundary: DiagnosticsProvider_
   - _Depends: 5.1_
-- [ ] 6.2 Record candidate match and exclusion diagnostics
+- [x] 6.2 Record candidate match and exclusion diagnostics
   - Surface exact, non-exact, ambiguous, no-match, effective fact source, estimate basis, and exclusion reason in existing routing diagnostics or attempt lineage.
   - Keep diagnostics compact and free of request content.
   - Done when tests prove non-exact match, ambiguity, source precedence, estimate basis, and exclusion reasons are visible when diagnostics are enabled.
   - _Requirements: 4.3, 4.6, 5.7, 7.7, 9.3, 9.4, 9.5, 9.7, 10.4_
   - _Boundary: DiagnosticsProvider_
   - _Depends: 5.3, 6.1_
 
-- [ ] 7. Add parser fuzzing, architecture guards, and end-to-end regressions
-- [ ] 7.1 Fuzz and harden catalog payload parsing
+- [x] 7. Add parser fuzzing, architecture guards, and end-to-end regressions
+- [x] 7.1 Fuzz and harden catalog payload parsing
   - Add fuzz coverage for malformed provider maps, sparse optional fields, oversized irrelevant fields, and invalid limit values.
   - Done when fuzz smoke runs against the parser without panics and all invalid inputs fail closed without replacing the active snapshot.
   - _Requirements: 1.3, 1.6, 3.3, 3.6, 10.5_
   - _Boundary: ModelsDevSource_
   - _Depends: 3.1_
-- [ ] 7.2 Add architecture boundary checks for catalog code
+- [x] 7.2 Add architecture boundary checks for catalog code
   - Verify core catalog code does not import provider SDKs, backend plugins, frontend plugins, or raw provider execution packages.
   - Verify models.dev raw schema types remain outside public `pkg/lipapi` contracts.
   - Done when architecture tests fail on forbidden imports and pass for the intended package boundaries.
   - _Requirements: 3.3, 10.5, 11.4_
   - _Boundary: Architecture Tests_
   - _Depends: 3.1, 5.1_
-- [ ] 7.3 Validate critical multi-candidate flows end to end
+- [x] 7.3 Validate critical multi-candidate flows end to end
   - Cover a multi-candidate request where catalog/admin facts reject one candidate through negotiation and route to another candidate.
   - Cover a multi-candidate request where no admin/catalog match applies and no feature-driven limiting occurs.
   - Cover a request where a catalog refresh happens during evaluation and only later candidate evaluations or later requests can observe the new snapshot.

diff --git a/Makefile b/Makefile
@@ -62,6 +62,7 @@ FUZZTIME ?= 500ms
 test-fuzz:
 	@echo "Fuzz smoke (FUZZTIME=$(FUZZTIME)) one target per line"
 	$(GO) test -fuzz=FuzzJSONRoundTrip$$ -fuzztime=$(FUZZTIME) -run=^$$ ./internal/testkit
+	$(GO) test -fuzz=FuzzParseSnapshot$$ -fuzztime=$(FUZZTIME) -run=^$$ ./internal/infra/modelcatalog/modelsdev
 	$(GO) test -fuzz=FuzzParseSelector$$ -fuzztime=$(FUZZTIME) -run=^$$ ./internal/core/routing
 	$(GO) test -fuzz=FuzzParseSelectorFromBytes$$ -fuzztime=$(FUZZTIME) -run=^$$ ./internal/core/routing
 	$(GO) test -fuzz=FuzzDecodeCreateRequest$$ -fuzztime=$(FUZZTIME) -run=^$$ ./internal/plugins/frontends/openairesponses

diff --git a/config/config.yaml b/config/config.yaml
@@ -119,6 +119,26 @@ routing:
 hooks:
   tool_reactor_error_policy: fail_open
 
+# Optional models.dev snapshot catalog (disabled by default). Operator guide: docs/capability-catalogs.md
+# model_catalog:
+#   enabled: false
+#   external_updates_enabled: false
+#   # Local JSON snapshot path (required when enabled or when external_updates_enabled needs a cache).
+#   cache_path: ./data/model_catalog.json
+#   # Background refresh (only when external_updates_enabled: true):
+#   # update_interval: 1h
+#   # source_url: "https://example.com/models.json"   # HTTPS recommended; see doc "Trust and exposure"
+#   # fetch_timeout: 30s   # optional extra bound when fetch context has no deadline (defense in depth)
+#   # diagnostics_path: /debug/model_catalog   # must start with /; must not overlap diagnostics.* paths
+#   # model_overrides:
+#   #   - model: "gpt-4o"
+#   #     tools: true
+#   #     context_limit_tokens: 128000
+#   # backend_model_overrides:
+#   #   - backend: "openai-responses"
+#   #     model: "gpt-4o-mini"
+#   #     reasoning: false
+
 continuity:
   in_memory: true
   store: memory
@@ -152,6 +172,12 @@ continuity:
 #   require_workspace_id: false   # when true, reject turns with no resolved workspace id (secure-session BeginTurn)
 #   workspace_resolve_on_error: fail_open   # fail_closed: workspace resolver errors deny the request (Req 11.6)
 #   resume_token_bind_principal_only: false # when true, resume fingerprints use only principal id (stable across client hint drift)
+#   # Optional: process-local TTL cache for durable SQL stores (sqlite/postgres) reducing repeated sessionExists /
+#   # transcript_enabled reads. Empty sql_query_cache_ttl disables caching (default). Short TTL trades fewer DB round
+#   # trips for brief staleness if transcript policy or session rows are mutated out-of-band; future admin mutation
+#   # APIs must invalidate this cache when added.
+#   # sql_query_cache_ttl: 30s
+#   # sql_query_cache_max_entries: 4096   # optional cap per logical cache; omit or zero uses 4096 when ttl is set
 
 plugins:
   frontends: