Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmd/lipstd/testdata/dogfood-local-stub/inventory.golden.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@
"factory_kind": "nvidia",
"enabled": false
},
{
"id": "ollama",
"factory_kind": "ollama",
"enabled": false
},
{
"id": "ollama-cloud",
"factory_kind": "ollama-cloud",
"enabled": false
},
{
"id": "dogfood-local",
"factory_kind": "local-stub",
Expand Down
10 changes: 10 additions & 0 deletions cmd/lipstd/testdata/dogfood-local-stub/routes.golden.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@
"kind": "nvidia",
"enabled": false
},
{
"id": "ollama",
"kind": "ollama",
"enabled": false
},
{
"id": "ollama-cloud",
"kind": "ollama-cloud",
"enabled": false
},
{
"id": "dogfood-local",
"kind": "local-stub",
Expand Down
22 changes: 22 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,28 @@ plugins:
config: {}
# base_url: https://integrate.api.nvidia.com/v1 # default
# api_key: "" # or use NVIDIA_API_KEY / NVIDIA_API_KEY_N env vars
- id: ollama
enabled: false
config: {}
# base_url: http://localhost:11434/v1 # default local Ollama OpenAI-compatible root
# responses_api: auto # auto | enabled | disabled (auto probes /api/version for >= 0.13.3)
# api_key: "" # optional reverse-proxy auth; default installs use dummy credential
# discovery:
# enabled: true
# local_models: true
# capabilities: true
# timeout: 15s
- id: ollama-cloud
enabled: false
config: {}
# base_url: http://localhost:11434/v1 # local Ollama app proxying cloud models
# responses_api: auto
# discovery:
# enabled: true
# cloud_models: true
# capabilities: true
# cloud_models_url: https://ollama.com/api/tags
# timeout: 15s
features:
- id: submit-noop
enabled: true
Expand Down
6 changes: 6 additions & 0 deletions config/examples/anthropic-stub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ plugins:
- id: nvidia
enabled: false
config: {}
- id: ollama
enabled: false
config: {}
- id: ollama-cloud
enabled: false
config: {}
- kind: local-stub
id: stub-anthropic
enabled: true
Expand Down
6 changes: 6 additions & 0 deletions config/examples/dogfood-local-stub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ plugins:
- id: nvidia
enabled: false
config: {}
- id: ollama
enabled: false
config: {}
- id: ollama-cloud
enabled: false
config: {}
- kind: local-stub
id: dogfood-local
enabled: true
Expand Down
6 changes: 6 additions & 0 deletions config/examples/gemini-stub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ plugins:
- id: nvidia
enabled: false
config: {}
- id: ollama
enabled: false
config: {}
- id: ollama-cloud
enabled: false
config: {}
- kind: local-stub
id: stub-gemini
enabled: true
Expand Down
6 changes: 6 additions & 0 deletions config/examples/openai-legacy-stub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ plugins:
- id: nvidia
enabled: false
config: {}
- id: ollama
enabled: false
config: {}
- id: ollama-cloud
enabled: false
config: {}
- kind: local-stub
id: stub-oal
enabled: true
Expand Down
6 changes: 6 additions & 0 deletions config/examples/openai-responses-stub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ plugins:
- id: nvidia
enabled: false
config: {}
- id: ollama
enabled: false
config: {}
- id: ollama-cloud
enabled: false
config: {}
- kind: local-stub
id: stub-oar
enabled: true
Expand Down
7 changes: 5 additions & 2 deletions docs/capability-catalogs.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ Backend model inventory answers: "which configured backend instances expose cano
Operator rules:

- The active registry is immutable and process-local. Request-time lookup does not read files and does not call remote providers.
- Every enabled backend instance must expose a `pkg/lipsdk/modelinventory.Provider`. Third-party backend
plugins should load remote inventory from their provider API or expose a static file/inline inventory.
- Every enabled backend instance must expose a `pkg/lipsdk/modelinventory.Provider` and at least one
`execbackend.Backend.BackendPrefixes` entry. Prefixes are validated before discovery and must be unique
across backend connector kinds; multiple instances of the same kind may reuse that kind's prefix.
Canonical inventory ids must not use the qualifier form
`<backend-prefix>:<canonical-id>` (for example `ollama:google/gemma4`).
- At startup, the runtime loads `model_inventory.cache_path` first when set. A valid cache avoids an immediate remote model-list call.
- If no valid cache is available, startup calls each enabled backend inventory provider once. Startup fails only when no valid cache exists and discovery cannot produce a valid registry.
- Background refresh defaults to `1h` and has a minimum of `1h`. A failed refresh keeps the latest successful registry active.
Expand Down
7 changes: 6 additions & 1 deletion docs/plugin-authoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,12 @@ A plugin should reject invalid config at startup rather than fail during the fir

## Backend model inventory

Backend plugins must expose `execbackend.Backend.ModelInventory` with a `pkg/lipsdk/modelinventory.Provider`.
Backend plugins must expose `execbackend.Backend.ModelInventory` with a `pkg/lipsdk/modelinventory.Provider`
and at least one `execbackend.Backend.BackendPrefixes` entry. Prefixes must match the backend factory id
(for example `openai-responses`, `ollama`, `ollama-cloud`) and must be unique across backend connector
kinds at runtime. Multiple instances of the same connector kind may reuse that kind's prefix.
Canonical model IDs must use the `vendor/model` form; do not publish
inventory rows whose canonical id uses a backend prefix qualifier such as `ollama:google/gemma4`.
The core model registry uses this provider at startup and during background refresh to answer fast routing
lookups for canonical model IDs such as `openai/gpt-5`.

Expand Down
6 changes: 4 additions & 2 deletions internal/archtest/backend_lifecycle_contract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ func TestOfficialBackendsHaveLifecycleContractTests(t *testing.T) {
root := repoRoot(t)
backendsDir := filepath.Join(root, "internal", "plugins", "backends")
lifecycleDelegatedToSharedAdapter := map[string]string{
"openrouter": "openaicompat",
"nvidia": "openaicompat",
"ollama": "openaicompat",
"ollama-cloud": "openaicompat",
"openrouter": "openaicompat",
"nvidia": "openaicompat",
}
skipDirs := map[string]struct{}{
"credpool": {}, "openaicaps": {}, "openaicred": {}, "streampeek": {}, "checkcfg": {},
Expand Down
4 changes: 4 additions & 0 deletions internal/core/execbackend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ type Backend struct {
ResolveTransportCaps func(ctx context.Context, call lipapi.Call, cand routing.AttemptCandidate) lipapi.BackendTransportCaps
Open func(ctx context.Context, call lipapi.Call, cand routing.AttemptCandidate) (lipapi.ManagedEventStream, error)
ModelInventory modelinventory.Provider
// BackendPrefixes names this connector kind for model-inventory discovery. Prefixes may be
// shared by instances of the same backend kind, but different backend kinds must not claim the
// same prefix. Canonical model IDs must not use the qualifier form "<prefix>:<canonical-id>".
BackendPrefixes []string

BillingFinalizationSupported bool
FinalizeBilling func(ctx context.Context, in BillingFinalizationInput) (lipapi.Event, error)
Expand Down
150 changes: 150 additions & 0 deletions internal/core/modelregistry/prefix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package modelregistry_test

import (
"context"
"errors"
"strings"
"sync"
"testing"

"github.com/matdev83/go-llm-interactive-proxy/internal/core/modelregistry"
"github.com/matdev83/go-llm-interactive-proxy/pkg/lipsdk/modelinventory"
)

func TestBuild_allowsDuplicateBackendPrefixForSameKind(t *testing.T) {
t.Parallel()

reg, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{
{
BackendID: "openai-primary",
Kind: "openai-responses",
BackendPrefixes: []string{"openai-responses"},
Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
{CanonicalID: "openai/gpt-4o", NativeID: "gpt-4o"},
}},
},
{
BackendID: "openai-fallback",
Kind: "openai-responses",
BackendPrefixes: []string{"openai-responses"},
Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
{CanonicalID: "openai/gpt-4.1", NativeID: "gpt-4.1"},
}},
},
})
if err != nil {
t.Fatalf("Build() error = %v", err)
}
if got := reg.All(); len(got) != 2 {
t.Fatalf("models len = %d, want 2", len(got))
}
}

func TestBuild_rejectsDuplicateBackendPrefixBeforeLoadModels(t *testing.T) {
t.Parallel()

first := &prefixCountingProvider{err: errors.New("first must not load")}
second := &prefixCountingProvider{err: errors.New("second must not load")}
_, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{
{
BackendID: "backend-a",
Kind: "test-a",
BackendPrefixes: []string{"shared"},
Provider: first,
},
{
BackendID: "backend-b",
Kind: "test-b",
BackendPrefixes: []string{"shared"},
Provider: second,
},
})
if !errors.Is(err, modelregistry.ErrDuplicateBackendPrefix) {
t.Fatalf("Build() error = %v, want ErrDuplicateBackendPrefix", err)
}
if !strings.Contains(err.Error(), "backend-a") || !strings.Contains(err.Error(), "backend-b") {
t.Fatalf("duplicate prefix error = %v, want both backend ids", err)
}
if !strings.Contains(err.Error(), "test-a") || !strings.Contains(err.Error(), "test-b") {
t.Fatalf("duplicate prefix error = %v, want both backend kinds", err)
}
if first.calls != 0 || second.calls != 0 {
t.Fatalf("LoadModels calls = %d/%d, want 0/0", first.calls, second.calls)
}
}

func TestBuild_rejectsMissingBackendPrefix(t *testing.T) {
t.Parallel()

provider := &prefixCountingProvider{models: []modelinventory.Model{{
CanonicalID: "vendor/model",
NativeID: "model",
}}}
_, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{{
BackendID: "openai",
Kind: "openai-responses",
Provider: provider,
}})
if !errors.Is(err, modelregistry.ErrMissingBackendPrefix) {
t.Fatalf("Build() error = %v, want ErrMissingBackendPrefix", err)
}
if provider.calls != 0 {
t.Fatalf("LoadModels calls = %d, want 0", provider.calls)
}
}

func TestBuild_rejectsQualifiedCanonicalIDWithRegisteredPrefix(t *testing.T) {
t.Parallel()

_, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{{
BackendID: "ollama-local",
Kind: "ollama",
BackendPrefixes: []string{"ollama"},
Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
{CanonicalID: "ollama:google/gemma4", NativeID: "google/gemma4"},
}},
}})
if !errors.Is(err, modelregistry.ErrInvalidCanonicalID) {
t.Fatalf("Build() error = %v, want ErrInvalidCanonicalID", err)
}
}

func TestBuild_allowsSlashCanonicalWhenVendorMatchesRegisteredPrefix(t *testing.T) {
t.Parallel()

reg, err := modelregistry.Build(context.Background(), []modelregistry.BackendInventory{{
BackendID: "ollama-local",
Kind: "ollama",
BackendPrefixes: []string{"ollama"},
Provider: modelinventory.StaticProvider{Models: []modelinventory.Model{
{CanonicalID: "ollama/llama3", NativeID: "llama3:latest"},
}},
}})
if err != nil {
t.Fatalf("Build() error = %v", err)
}
got, ok := reg.Lookup("ollama/llama3")
if !ok || len(got) != 1 || got[0].NativeID != "llama3:latest" {
t.Fatalf("Lookup(ollama/llama3) = %+v, %v", got, ok)
}
}

type prefixCountingProvider struct {
mu sync.Mutex
calls int
err error
models []modelinventory.Model
}

func (p *prefixCountingProvider) LoadModels(context.Context) (modelinventory.Snapshot, error) {
p.mu.Lock()
defer p.mu.Unlock()
p.calls++
if p.err != nil {
return modelinventory.Snapshot{}, p.err
}
return modelinventory.Snapshot{
Source: modelinventory.SourceRemote,
Models: append([]modelinventory.Model(nil), p.models...),
}, nil
}
Loading
Loading