matdev83 · matdev83 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/cmd/lipstd/testdata/dogfood-local-stub/inventory.golden.json b/cmd/lipstd/testdata/dogfood-local-stub/inventory.golden.json
@@ -62,6 +62,16 @@
       "factory_kind": "nvidia",
       "enabled": false
     },
+    {
+      "id": "ollama",
+      "factory_kind": "ollama",
+      "enabled": false
+    },
+    {
+      "id": "ollama-cloud",
+      "factory_kind": "ollama-cloud",
+      "enabled": false
+    },
     {
       "id": "dogfood-local",
       "factory_kind": "local-stub",

diff --git a/cmd/lipstd/testdata/dogfood-local-stub/routes.golden.json b/cmd/lipstd/testdata/dogfood-local-stub/routes.golden.json
@@ -41,6 +41,16 @@
       "kind": "nvidia",
       "enabled": false
     },
+    {
+      "id": "ollama",
+      "kind": "ollama",
+      "enabled": false
+    },
+    {
+      "id": "ollama-cloud",
+      "kind": "ollama-cloud",
+      "enabled": false
+    },
     {
       "id": "dogfood-local",
       "kind": "local-stub",

diff --git a/config/config.yaml b/config/config.yaml
@@ -276,6 +276,28 @@ plugins:
       config: {}
       # base_url: https://integrate.api.nvidia.com/v1  # default
       # api_key: ""          # or use NVIDIA_API_KEY / NVIDIA_API_KEY_N env vars
+    - id: ollama
+      enabled: false
+      config: {}
+      # base_url: http://localhost:11434/v1  # default local Ollama OpenAI-compatible root
+      # responses_api: auto  # auto | enabled | disabled (auto probes /api/version for >= 0.13.3)
+      # api_key: ""          # optional reverse-proxy auth; default installs use dummy credential
+      # discovery:
+      #   enabled: true
+      #   local_models: true
+      #   capabilities: true
+      #   timeout: 15s
+    - id: ollama-cloud
+      enabled: false
+      config: {}
+      # base_url: http://localhost:11434/v1  # local Ollama app proxying cloud models
+      # responses_api: auto
+      # discovery:
+      #   enabled: true
+      #   cloud_models: true
+      #   capabilities: true
+      #   cloud_models_url: https://ollama.com/api/tags
+      #   timeout: 15s
   features:
     - id: submit-noop
       enabled: true

diff --git a/config/examples/anthropic-stub.yaml b/config/examples/anthropic-stub.yaml
@@ -60,6 +60,12 @@ plugins:
     - id: nvidia
       enabled: false
       config: {}
+    - id: ollama
+      enabled: false
+      config: {}
+    - id: ollama-cloud
+      enabled: false
+      config: {}
     - kind: local-stub
       id: stub-anthropic
       enabled: true

diff --git a/config/examples/dogfood-local-stub.yaml b/config/examples/dogfood-local-stub.yaml
@@ -63,6 +63,12 @@ plugins:
     - id: nvidia
       enabled: false
       config: {}
+    - id: ollama
+      enabled: false
+      config: {}
+    - id: ollama-cloud
+      enabled: false
+      config: {}
     - kind: local-stub
       id: dogfood-local
       enabled: true

diff --git a/config/examples/gemini-stub.yaml b/config/examples/gemini-stub.yaml
@@ -60,6 +60,12 @@ plugins:
     - id: nvidia
       enabled: false
       config: {}
+    - id: ollama
+      enabled: false
+      config: {}
+    - id: ollama-cloud
+      enabled: false
+      config: {}
     - kind: local-stub
       id: stub-gemini
       enabled: true

diff --git a/config/examples/openai-legacy-stub.yaml b/config/examples/openai-legacy-stub.yaml
@@ -60,6 +60,12 @@ plugins:
     - id: nvidia
       enabled: false
       config: {}
+    - id: ollama
+      enabled: false
+      config: {}
+    - id: ollama-cloud
+      enabled: false
+      config: {}
     - kind: local-stub
       id: stub-oal
       enabled: true

diff --git a/config/examples/openai-responses-stub.yaml b/config/examples/openai-responses-stub.yaml
@@ -61,6 +61,12 @@ plugins:
     - id: nvidia
       enabled: false
       config: {}
+    - id: ollama
+      enabled: false
+      config: {}
+    - id: ollama-cloud
+      enabled: false
+      config: {}
     - kind: local-stub
       id: stub-oar
       enabled: true

diff --git a/docs/capability-catalogs.md b/docs/capability-catalogs.md
@@ -20,8 +20,11 @@ Backend model inventory answers: "which configured backend instances expose cano
 Operator rules:
 
 - The active registry is immutable and process-local. Request-time lookup does not read files and does not call remote providers.
-- Every enabled backend instance must expose a `pkg/lipsdk/modelinventory.Provider`. Third-party backend
-  plugins should load remote inventory from their provider API or expose a static file/inline inventory.
+- Every enabled backend instance must expose a `pkg/lipsdk/modelinventory.Provider` and at least one
+  `execbackend.Backend.BackendPrefixes` entry. Prefixes are validated before discovery and must be unique
+  across backend connector kinds; multiple instances of the same kind may reuse that kind's prefix.
+  Canonical inventory ids must not use the qualifier form
+  `<backend-prefix>:<canonical-id>` (for example `ollama:google/gemma4`).
 - At startup, the runtime loads `model_inventory.cache_path` first when set. A valid cache avoids an immediate remote model-list call.
 - If no valid cache is available, startup calls each enabled backend inventory provider once. Startup fails only when no valid cache exists and discovery cannot produce a valid registry.
 - Background refresh defaults to `1h` and has a minimum of `1h`. A failed refresh keeps the latest successful registry active.

diff --git a/docs/plugin-authoring.md b/docs/plugin-authoring.md
@@ -61,7 +61,12 @@ A plugin should reject invalid config at startup rather than fail during the fir
 
 ## Backend model inventory
 
-Backend plugins must expose `execbackend.Backend.ModelInventory` with a `pkg/lipsdk/modelinventory.Provider`.
+Backend plugins must expose `execbackend.Backend.ModelInventory` with a `pkg/lipsdk/modelinventory.Provider`
+and at least one `execbackend.Backend.BackendPrefixes` entry. Prefixes must match the backend factory id
+(for example `openai-responses`, `ollama`, `ollama-cloud`) and must be unique across backend connector
+kinds at runtime. Multiple instances of the same connector kind may reuse that kind's prefix.
+Canonical model IDs must use the `vendor/model` form; do not publish
+inventory rows whose canonical id uses a backend prefix qualifier such as `ollama:google/gemma4`.
 The core model registry uses this provider at startup and during background refresh to answer fast routing
 lookups for canonical model IDs such as `openai/gpt-5`.
 

diff --git a/internal/archtest/backend_lifecycle_contract_test.go b/internal/archtest/backend_lifecycle_contract_test.go
@@ -12,8 +12,10 @@ func TestOfficialBackendsHaveLifecycleContractTests(t *testing.T) {
 	root := repoRoot(t)
 	backendsDir := filepath.Join(root, "internal", "plugins", "backends")
 	lifecycleDelegatedToSharedAdapter := map[string]string{
-		"openrouter": "openaicompat",
-		"nvidia":     "openaicompat",
+		"ollama":       "openaicompat",
+		"ollama-cloud": "openaicompat",
+		"openrouter":   "openaicompat",
+		"nvidia":       "openaicompat",
 	}
 	skipDirs := map[string]struct{}{
 		"credpool": {}, "openaicaps": {}, "openaicred": {}, "streampeek": {}, "checkcfg": {},

diff --git a/internal/core/execbackend/backend.go b/internal/core/execbackend/backend.go
@@ -24,6 +24,10 @@ type Backend struct {
 	ResolveTransportCaps func(ctx context.Context, call lipapi.Call, cand routing.AttemptCandidate) lipapi.BackendTransportCaps
 	Open                 func(ctx context.Context, call lipapi.Call, cand routing.AttemptCandidate) (lipapi.ManagedEventStream, error)
 	ModelInventory       modelinventory.Provider
+	// BackendPrefixes names this connector kind for model-inventory discovery. Prefixes may be
+	// shared by instances of the same backend kind, but different backend kinds must not claim the
+	// same prefix. Canonical model IDs must not use the qualifier form "<prefix>:<canonical-id>".
+	BackendPrefixes []string
 
 	BillingFinalizationSupported bool
 	FinalizeBilling              func(ctx context.Context, in BillingFinalizationInput) (lipapi.Event, error)

diff --git a/internal/core/modelregistry/prefix_test.go b/internal/core/modelregistry/prefix_test.go
@@ -0,0 +1,150 @@
+package modelregistry_test
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"sync"
+	"testing"
+
+	"github.com/matdev83/go-llm-interactive-proxy/internal/core/modelregistry"
+	"github.com/matdev83/go-llm-interactive-proxy/pkg/lipsdk/modelinventory"
+)
+
+func TestBuild_allowsDuplicateBackendPrefixForSameKind(t *testing.T) {
+	t.Parallel()
+
+	reg, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{
+		{
+			BackendID:       "openai-primary",
+			Kind:            "openai-responses",
+			BackendPrefixes: []string{"openai-responses"},
+			Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
+				{CanonicalID: "openai/gpt-4o", NativeID: "gpt-4o"},
+			}},
+		},
+		{
+			BackendID:       "openai-fallback",
+			Kind:            "openai-responses",
+			BackendPrefixes: []string{"openai-responses"},
+			Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
+				{CanonicalID: "openai/gpt-4.1", NativeID: "gpt-4.1"},
+			}},
+		},
+	})
+	if err != nil {
+		t.Fatalf("Build() error = %v", err)
+	}
+	if got := reg.All(); len(got) != 2 {
+		t.Fatalf("models len = %d, want 2", len(got))
+	}
+}
+
+func TestBuild_rejectsDuplicateBackendPrefixBeforeLoadModels(t *testing.T) {
+	t.Parallel()
+
+	first := &prefixCountingProvider{err: errors.New("first must not load")}
+	second := &prefixCountingProvider{err: errors.New("second must not load")}
+	_, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{
+		{
+			BackendID:       "backend-a",
+			Kind:            "test-a",
+			BackendPrefixes: []string{"shared"},
+			Provider:        first,
+		},
+		{
+			BackendID:       "backend-b",
+			Kind:            "test-b",
+			BackendPrefixes: []string{"shared"},
+			Provider:        second,
+		},
+	})
+	if !errors.Is(err, modelregistry.ErrDuplicateBackendPrefix) {
+		t.Fatalf("Build() error = %v, want ErrDuplicateBackendPrefix", err)
+	}
+	if !strings.Contains(err.Error(), "backend-a") || !strings.Contains(err.Error(), "backend-b") {
+		t.Fatalf("duplicate prefix error = %v, want both backend ids", err)
+	}
+	if !strings.Contains(err.Error(), "test-a") || !strings.Contains(err.Error(), "test-b") {
+		t.Fatalf("duplicate prefix error = %v, want both backend kinds", err)
+	}
+	if first.calls != 0 || second.calls != 0 {
+		t.Fatalf("LoadModels calls = %d/%d, want 0/0", first.calls, second.calls)
+	}
+}
+
+func TestBuild_rejectsMissingBackendPrefix(t *testing.T) {
+	t.Parallel()
+
+	provider := &prefixCountingProvider{models: []modelinventory.Model{{
+		CanonicalID: "vendor/model",
+		NativeID:    "model",
+	}}}
+	_, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{{
+		BackendID: "openai",
+		Kind:      "openai-responses",
+		Provider:  provider,
+	}})
+	if !errors.Is(err, modelregistry.ErrMissingBackendPrefix) {
+		t.Fatalf("Build() error = %v, want ErrMissingBackendPrefix", err)
+	}
+	if provider.calls != 0 {
+		t.Fatalf("LoadModels calls = %d, want 0", provider.calls)
+	}
+}
+
+func TestBuild_rejectsQualifiedCanonicalIDWithRegisteredPrefix(t *testing.T) {
+	t.Parallel()
+
+	_, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{{
+		BackendID:       "ollama-local",
+		Kind:            "ollama",
+		BackendPrefixes: []string{"ollama"},
+		Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
+			{CanonicalID: "ollama:google/gemma4", NativeID: "google/gemma4"},
+		}},
+	}})
+	if !errors.Is(err, modelregistry.ErrInvalidCanonicalID) {
+		t.Fatalf("Build() error = %v, want ErrInvalidCanonicalID", err)
+	}
+}
+
+func TestBuild_allowsSlashCanonicalWhenVendorMatchesRegisteredPrefix(t *testing.T) {
+	t.Parallel()
+
+	reg, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{{
+		BackendID:       "ollama-local",
+		Kind:            "ollama",
+		BackendPrefixes: []string{"ollama"},
+		Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
+			{CanonicalID: "ollama/llama3", NativeID: "llama3:latest"},
+		}},
+	}})
+	if err != nil {
+		t.Fatalf("Build() error = %v", err)
+	}
+	got, ok := reg.Lookup("ollama/llama3")
+	if !ok || len(got) != 1 || got[0].NativeID != "llama3:latest" {
+		t.Fatalf("Lookup(ollama/llama3) = %+v, %v", got, ok)
+	}
+}
+
+type prefixCountingProvider struct {
+	mu     sync.Mutex
+	calls  int
+	err    error
+	models []modelinventory.Model
+}
+
+func (p *prefixCountingProvider) LoadModels(context.Context) (modelinventory.Snapshot, error) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.calls++
+	if p.err != nil {
+		return modelinventory.Snapshot{}, p.err
+	}
+	return modelinventory.Snapshot{
+		Source: modelinventory.SourceRemote,
+		Models: append([]modelinventory.Model(nil), p.models...),
+	}, nil
+}