Skip to content
Open
243 changes: 243 additions & 0 deletions aisuite/providers/foundry_local_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import importlib
import os

from aisuite.provider import LLMError
from aisuite.providers.openai_provider import OpenaiProvider

_APP_NAME = "aisuite"


def _normalize_endpoint(base_url: str) -> str:
"""Normalize a Foundry Local host to its OpenAI-compatible ``/v1`` endpoint
(idempotently, so a host already ending in /v1 is left untouched)."""
base_url = base_url.rstrip("/")
if not base_url.endswith("/v1"):
base_url += "/v1"
return base_url


def _import_new_sdk():
"""Return ``(Configuration, FoundryLocalManager)`` from the 1.x
``foundry_local_sdk`` package, or ``None`` if it is not installed."""
try:
module = importlib.import_module("foundry_local_sdk")
return module.Configuration, module.FoundryLocalManager
except ImportError:
return None


def _import_legacy_sdk():
"""Return ``FoundryLocalManager`` from the 0.x ``foundry_local`` package, or
``None`` if it is not installed."""
try:
module = importlib.import_module("foundry_local")
return module.FoundryLocalManager
except ImportError:
return None


class FoundryLocalProvider(OpenaiProvider):
"""
Foundry Local provider for Microsoft's `Foundry Local
<https://learn.microsoft.com/azure/ai-foundry/foundry-local/>`_ on-device runtime.

Foundry Local exposes an OpenAI-compatible API, so reusing the OpenAI SDK
means tool calls, tool-result messages, and finish_reason flow through
unchanged. There are two ways to use it:

* Managed (default): the Foundry Local Python SDK starts the local service,
downloads and loads the requested model, and resolves the model alias to
the concrete model id served by the OpenAI-compatible endpoint. Both the
current ``foundry-local-sdk`` (1.x, imported as ``foundry_local_sdk``) and
the legacy 0.x package (imported as ``foundry_local``) are supported.
Install the variant that matches your hardware, e.g.
``pip install foundry-local-sdk``. Because Foundry Local picks a dynamic
port, the SDK is the easiest way to discover the endpoint.
* Explicit endpoint: set ``api_url``/``base_url`` (or the
``FOUNDRY_LOCAL_API_URL`` environment variable) to an already-running
Foundry Local OpenAI-compatible endpoint. In this mode the SDK is not
required. A friendly model alias is resolved to the concrete served id by
querying the endpoint's ``/v1/models`` (an already-concrete id is passed
through unchanged), so the same ``foundry_local:<alias>`` string works in
both modes.
"""

_ENV_API_URL = "FOUNDRY_LOCAL_API_URL"

def __init__(self, **config):
base_url = (
config.pop("base_url", None)
or config.pop("api_url", None)
or os.getenv(self._ENV_API_URL)
)
self._model_ids = {}

if base_url:
# Talk directly to an already-running Foundry Local endpoint.
config["base_url"] = _normalize_endpoint(base_url)
# Foundry Local ignores the API key, but the OpenAI SDK requires one.
config.setdefault("api_key", "foundry")
self._managed = False
super().__init__(**config)
else:
# Defer client creation until the first request, when the model
# alias is known and the SDK can start the service and load it.
self._managed = True
self._config = config
self._manager = None
self._backend = None
self._bootstrapped = False
self.audio = None

def chat_completions_create(self, model, messages, **kwargs):
if self._managed:
model = self._ensure_managed_model(model)
else:
model = self._resolve_explicit_model(model)
return super().chat_completions_create(model, messages, **kwargs)

def _resolve_explicit_model(self, model):
"""In explicit-endpoint mode, resolve a friendly alias (e.g.
``phi-3.5-mini``) to the concrete model id the endpoint serves, so the
same ``foundry_local:<alias>`` string works in both managed and explicit
modes. Resolution queries the endpoint's ``/v1/models`` and matches, in
order, an exact id, a ``parent`` alias, then an id prefixed by the alias.
Falls back to ``model`` unchanged when the lookup fails or there is no
confident match (so an already-concrete id keeps working)."""
if model in self._model_ids:
return self._model_ids[model]

try:
served = list(self.client.models.list())
except Exception:
# Endpoint unreachable or doesn't implement /v1/models: don't block
# the request, just forward the model string unchanged.
served = None

resolved = self._match_served_model(model, served) if served else model
self._model_ids[model] = resolved
return resolved

@staticmethod
def _served_parent(served_model):
"""Return the ``parent`` alias a served model was derived from, if the
endpoint advertises one (it may be a model attribute or an extra field)."""
parent = getattr(served_model, "parent", None)
if parent is None:
extra = getattr(served_model, "model_extra", None)
if extra:
parent = extra.get("parent")
return parent

def _match_served_model(self, alias, served):
ids = [m.id for m in served]
# 1. Already a concrete served id.
if alias in ids:
return alias
# 2. A model the endpoint reports as derived from this alias.
by_parent = [m.id for m in served if self._served_parent(m) == alias]
if len(by_parent) == 1:
return by_parent[0]
if len(by_parent) > 1:
raise LLMError(self._ambiguous_alias_message(alias, by_parent))
# 3. A served id whose alias-boundary prefix matches (e.g.
# "phi-3.5-mini" -> "phi-3.5-mini-instruct-generic-cpu:4").
by_prefix = [model_id for model_id in ids if model_id.startswith(alias + "-")]
if len(by_prefix) == 1:
return by_prefix[0]
if len(by_prefix) > 1:
raise LLMError(self._ambiguous_alias_message(alias, by_prefix))
# 4. No confident match: forward unchanged and let the endpoint decide.
return alias

@staticmethod
def _ambiguous_alias_message(alias, candidates):
return (
f"Foundry Local alias '{alias}' matches multiple models served by the "
f"endpoint: {', '.join(sorted(candidates))}. Pass one of these concrete "
"model ids instead."
)

def _ensure_managed_model(self, alias):
"""Bootstrap the Foundry Local service for ``alias`` on first use and
return the concrete model id the OpenAI-compatible endpoint expects."""
if not self._bootstrapped:
self._bootstrap(alias)
elif alias not in self._model_ids:
self._load_additional_model(alias)
return self._model_ids[alias]

def _bootstrap(self, alias):
new_sdk = _import_new_sdk()
if new_sdk is not None:
self._backend = "new"
self._bootstrap_new(new_sdk, alias)
else:
legacy_manager = _import_legacy_sdk()
if legacy_manager is None:
raise LLMError(
"Foundry Local SDK is not installed. Install it with "
"`pip install foundry-local-sdk`, or set api_url/base_url "
"(or the FOUNDRY_LOCAL_API_URL environment variable) to point "
"at a running Foundry Local endpoint."
)
self._backend = "legacy"
self._bootstrap_legacy(legacy_manager, alias)
self._bootstrapped = True

def _bootstrap_new(self, new_sdk, alias):
"""Bootstrap using the 1.x ``foundry_local_sdk`` singleton API: start the
OpenAI-compatible web service and point the OpenAI client at it."""
configuration_cls, manager_cls = new_sdk
if manager_cls.instance is None:
manager_cls.initialize(configuration_cls(app_name=_APP_NAME))
self._manager = manager_cls.instance
# Make the execution providers available before loading any model.
self._manager.download_and_register_eps()
self._model_ids[alias] = self._load_new_model(alias)
if getattr(self._manager, "urls", None) is None:
self._manager.start_web_service()

config = dict(self._config)
config["base_url"] = _normalize_endpoint(self._manager.urls[0])
config.setdefault("api_key", "foundry")
super().__init__(**config)

def _load_new_model(self, alias):
"""Download and load ``alias`` via the 1.x catalog and return its id."""
imodel = self._manager.catalog.get_model(alias)
if imodel is None:
raise LLMError(
f"Foundry Local model '{alias}' was not found in the catalog."
)
imodel.download()
imodel.load()
return imodel.id

def _bootstrap_legacy(self, manager_cls, alias):
"""Bootstrap using the legacy 0.x ``foundry_local`` API, where the
manager starts the service and loads the model on construction."""
self._manager = manager_cls(alias)
info = self._manager.get_model_info(alias)
self._model_ids[alias] = info.id if info is not None else alias

config = dict(self._config)
config["base_url"] = _normalize_endpoint(self._manager.endpoint)
config.setdefault("api_key", self._manager.api_key or "foundry")
super().__init__(**config)

def _load_additional_model(self, alias):
"""Download and load a second alias on the already-running service."""
if self._backend == "new":
self._model_ids[alias] = self._load_new_model(alias)
else:
self._manager.download_model(alias)
self._manager.load_model(alias)
info = self._manager.get_model_info(alias)
self._model_ids[alias] = info.id if info is not None else alias


# The provider factory derives the class name from the provider key
# ("foundry_local" -> "Foundry_localProvider" via str.capitalize), so keep that
# name available as an alias of the product-named class.
Foundry_localProvider = FoundryLocalProvider
3 changes: 2 additions & 1 deletion guides/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ Here are the instructions for:
- [xAI](xai.md)
- [DeepSeek](deepseek.md)

For locally hosted models using `Ollama` or `LM Studio`, follow these instructions:
For locally hosted models using `Ollama`, `LM Studio`, or `Foundry Local`, follow these instructions:
- [Ollama](ollama.md)
- [LM Studio](lmstudio.md)
- [Foundry Local](foundry_local.md)

Unless otherwise stated, these guides have not been endorsed by the providers.

Expand Down
107 changes: 107 additions & 0 deletions guides/foundry_local.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Foundry Local

[Foundry Local](https://learn.microsoft.com/azure/ai-foundry/foundry-local/) runs
open-source models directly on your device through Microsoft's on-device runtime.
It exposes an OpenAI-compatible API, so you can use the `aisuite` interface for
chat completions. No API key is needed for these locally hosted models.

There are two ways to use the `foundry_local` provider.

## Managed mode (recommended)

Install [Foundry Local](https://learn.microsoft.com/azure/ai-foundry/foundry-local/get-started)
and the Foundry Local Python SDK. The SDK is included in the `foundry-local` extra
(`pip install 'aisuite[foundry-local]'`) on Python 3.11+. You can also install it
directly, picking the package that matches your hardware
(see the [SDK reference](https://learn.microsoft.com/azure/foundry-local/reference/reference-sdk-current?pivots=programming-language-python)):

```shell
pip install foundry-local-sdk
```

In managed mode, `aisuite` uses the SDK to start the local service on demand,
download and load the requested model, and resolve the model alias (e.g.
`phi-3.5-mini`) to the concrete model id served by the endpoint. Because Foundry
Local picks a dynamic port, the SDK is the easiest way to discover the endpoint.
Both the current `foundry-local-sdk` (imported as `foundry_local_sdk`) and the
legacy 0.x package (imported as `foundry_local`) are supported.

Sample code:
```python
import aisuite as ai

def main():
client = ai.Client()
messages = [
{"role": "system", "content": "Be verbose"},
{"role": "user", "content": "What is the golden ratio?"},
]

# Use a Foundry Local model alias.
foundry_phi = "foundry_local:phi-3.5-mini"

response = client.chat.completions.create(
model=foundry_phi,
messages=messages,
temperature=0.75,
)
print(response.choices[0].message.content)


if __name__ == "__main__":
main()
```

## Explicit endpoint mode

If you already have the Foundry Local service running, you can point `aisuite`
directly at its OpenAI-compatible endpoint via the `api_url`/`base_url` config key
or the `FOUNDRY_LOCAL_API_URL` environment variable. The SDK is not required in
this mode. You can use either a model alias (e.g. `phi-3.5-mini`) — `aisuite`
resolves it to the concrete id the endpoint serves by querying its `/v1/models`
list — or pass a concrete model id directly. Make sure the model is already
loaded on the running service (e.g. `foundry model run phi-3.5-mini`).

```python
import aisuite as ai

def main():
client = ai.Client(
provider_configs={
"foundry_local": {
"api_url": "http://localhost:5273",
"timeout": 300,
}
}
)
messages = [
{"role": "user", "content": "What is the golden ratio?"},
]

response = client.chat.completions.create(
# A friendly alias is resolved against the endpoint; a concrete id
# (e.g. "Phi-3.5-mini-instruct-generic-cpu") also works.
model="foundry_local:phi-3.5-mini",
messages=messages,
temperature=0.75,
)
print(response.choices[0].message.content)


if __name__ == "__main__":
main()
```

## Tool (function) calling

Foundry Local exposes an OpenAI-compatible API, so tool calls flow through
`aisuite` unchanged for models that support them. Passing `tools=` to
`client.chat.completions.create(...)` currently requires the `mcp` extra to be
installed, otherwise the call fails with `NameError: name 'is_mcp_config' is not
defined` (this affects every provider, not just Foundry Local):

```shell
pip install 'aisuite[mcp]'
```

Happy coding! If you’d like to contribute, please read our [Contributing Guide](../CONTRIBUTING.md).
7 changes: 4 additions & 3 deletions platform/coworker/providers/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ def capabilities_for(model: str) -> ModelCapabilities:
provider = model.split(":", 1)[0].lower() if ":" in model else ""
name = model.split(":", 1)[-1].lower() # strip a provider prefix if present

# Ollama (local) models vary widely and many fake/mishandle parallel tool calls — assume
# tools work (we only point at tool-capable models) but stay conservative otherwise.
if provider == "ollama":
# Ollama / Foundry Local (local, OpenAI-compatible) models vary widely and many fake or
# mishandle parallel tool calls — assume tools work (we only point at tool-capable models)
# but stay conservative otherwise.
if provider in ("ollama", "foundry_local"):
return ModelCapabilities(
tools=True, vision=False, parallel_tool_calls=False, streaming=True
)
Expand Down
Loading