-
Notifications
You must be signed in to change notification settings - Fork 17
Moving things into LMI #446
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
68f0201
51d3bb9
5804864
9689698
0c93218
2715531
232e1cb
d7bf950
edbe8e5
be876fd
4609fc2
02b829e
ca77c21
f57642b
8de5cf6
7fcc321
6a12634
c993938
9547c20
741d709
cd06f99
9834861
6d8d567
b0f2b54
223a249
3f87402
f3fb059
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,279 @@ | ||||||||||||||||||||
| """Configuration types for LMI. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| An `LLMConfig` is an ordered chain of `ModelSpec` entries. `models[0]` is the | ||||||||||||||||||||
| primary model; `models[1:]` are fallbacks tried in order when the primary | ||||||||||||||||||||
| fails in ways that another model might handle. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| `LLMConfig.from_legacy_dict` accepts the dict-shaped configuration | ||||||||||||||||||||
| (`{model_list, fallbacks, router_kwargs}`) that mirrors litellm's Router layout. | ||||||||||||||||||||
| """ | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from __future__ import annotations | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from collections.abc import Awaitable, Callable | ||||||||||||||||||||
| from typing import Annotated, Any | ||||||||||||||||||||
|
|
||||||||||||||||||||
| import litellm | ||||||||||||||||||||
| from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, SecretStr | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from lmi.constants import DEFAULT_VERTEX_SAFETY_SETTINGS | ||||||||||||||||||||
| from lmi.types import LLMResult | ||||||||||||||||||||
|
|
||||||||||||||||||||
| ResponseValidator = Callable[[LLMResult], Awaitable[None] | None] | ||||||||||||||||||||
|
|
||||||||||||||||||||
| _DEFAULT_TEMPERATURE = 1.0 | ||||||||||||||||||||
| _OPENAI_ONLY_PARAMS = frozenset({"logprobs", "top_logprobs"}) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Per-call retry kwargs that LiteLLM honors via its own internal retry loop. LMI | ||||||||||||||||||||
| # owns retries through `_run_with_fallbacks` + `ModelSpec.max_retries`, so these | ||||||||||||||||||||
| # must never reach `litellm.acompletion`/`litellm.aresponses` regardless of how | ||||||||||||||||||||
| # they ended up in `ModelSpec.extra_params`. | ||||||||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Felt we should clarify that |
||||||||||||||||||||
| _LITELLM_RETRY_KWARGS = frozenset({"num_retries", "max_retries"}) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| class ModelSpec(BaseModel): | ||||||||||||||||||||
| """One model in an `LLMConfig` chain.""" | ||||||||||||||||||||
|
|
||||||||||||||||||||
| model_config = ConfigDict(extra="forbid") | ||||||||||||||||||||
|
|
||||||||||||||||||||
| name: str = Field( | ||||||||||||||||||||
| description=( | ||||||||||||||||||||
| "LiteLLM model string, e.g. 'gpt-4o-mini' or 'claude-3-5-sonnet-20241022'." | ||||||||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But don't LiteLLM model string start with like |
||||||||||||||||||||
| ), | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| api_base: str | None = None | ||||||||||||||||||||
| api_key: SecretStr | None = None | ||||||||||||||||||||
| timeout: float = Field(default=60.0, description="Per-request timeout in seconds.") | ||||||||||||||||||||
| max_retries: int = Field( | ||||||||||||||||||||
| default=3, | ||||||||||||||||||||
| description=( | ||||||||||||||||||||
| "Retries against this model before falling over to the next entry" | ||||||||||||||||||||
| " in the chain." | ||||||||||||||||||||
| ), | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| extra_params: dict[str, Any] = Field( | ||||||||||||||||||||
| default_factory=dict, | ||||||||||||||||||||
| description=( | ||||||||||||||||||||
| "Pass-through kwargs for litellm.acompletion / litellm.aresponses," | ||||||||||||||||||||
| " e.g. temperature, max_tokens, safety_settings, vertex_project." | ||||||||||||||||||||
| ), | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| responses_api: bool = Field( | ||||||||||||||||||||
| default=False, | ||||||||||||||||||||
| description=( | ||||||||||||||||||||
| "If True, dispatch this model via OpenAI's stateful Responses API" | ||||||||||||||||||||
| " (`litellm.aresponses`) instead of the Chat Completions API" | ||||||||||||||||||||
| " (`litellm.acompletion`)." | ||||||||||||||||||||
| ), | ||||||||||||||||||||
| ) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| @classmethod | ||||||||||||||||||||
| def from_name(cls, name: str, **overrides: Any) -> ModelSpec: | ||||||||||||||||||||
| """Build a `ModelSpec` with provider-aware defaults for `extra_params`. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Applies: Gemini default safety settings; `temperature` / `max_tokens` | ||||||||||||||||||||
| defaults; and silent drop of `logprobs` / `top_logprobs` for non-OpenAI | ||||||||||||||||||||
| providers (which don't support them). Explicit values in `overrides` | ||||||||||||||||||||
| always win over the defaults. | ||||||||||||||||||||
|
Comment on lines
+74
to
+77
|
||||||||||||||||||||
| Applies: Gemini default safety settings; `temperature` / `max_tokens` | |
| defaults; and silent drop of `logprobs` / `top_logprobs` for non-OpenAI | |
| providers (which don't support them). Explicit values in `overrides` | |
| always win over the defaults. | |
| Applies: Gemini default safety settings and `temperature` / | |
| `max_tokens` defaults. `logprobs` and `top_logprobs` are treated as | |
| OpenAI-only parameters: passing them for non-OpenAI providers raises | |
| `ValueError`. Explicit values in `overrides` always win over the | |
| defaults. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider using litellm for this instead of a DIY'd helper, it can detect this using litellm.get_llm_provider
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gemini supports these too, can you have Gemini work with these too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we do this validation in a field/model_validator, so it applies to all ModelSpec, not just this classmethod?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any chance we can not inject , 60.0 and , 3 defaults here? The less injection we have, the better
Copilot
AI
Apr 18, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLMConfig.from_legacy_dict() can pass an empty dict into _spec_from_legacy_params() when fallbacks references a model_name that isn’t present in model_list, which will then fail with a KeyError on params['model']. Consider validating that all referenced fallback model names exist (and raising a clear ValueError listing the missing names) before constructing the ordered chain.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we make this a classmethod of ModelSpec? Then we can get Self as the return type
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,5 @@ | ||
| import os | ||
| from sys import version_info | ||
|
|
||
| USE_RESPONSES_API = os.environ.get("USE_RESPONSES_API", "").lower() in {"1", "true"} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice this is awesome |
||
|
|
||
| # Estimate from OpenAI's FAQ | ||
| # https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them | ||
| CHARACTERS_PER_TOKEN_ASSUMPTION: float = 4.0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,46 @@ | ||
| from typing import Any | ||
|
|
||
|
|
||
| class JSONSchemaValidationError(ValueError): | ||
| """Raised when the completion does not match the specified schema.""" | ||
|
|
||
|
|
||
| class ModelRefusalError(RuntimeError): | ||
| """Raised when an LLM declines to complete a request (e.g. content filter). | ||
|
|
||
| Carries the raw provider response so callers that choose to handle the | ||
| refusal (rather than fall back) can still inspect it. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| message: str, | ||
| *, | ||
| model: str, | ||
| finish_reason: str | None, | ||
| response: Any = None, | ||
| ) -> None: | ||
| super().__init__(message) | ||
| self.model = model | ||
| self.finish_reason = finish_reason | ||
| self.response = response | ||
|
|
||
|
|
||
| class ResponseValidationError(RuntimeError): | ||
| """Raised when an `LLMConfig.response_validator` rejects an `LLMResult`. | ||
|
|
||
| Treated as transient by the retry/fallback loop so the validator gets a | ||
| fresh attempt at the same model (up to `ModelSpec.max_retries`) before | ||
| advancing to the next model. | ||
| """ | ||
|
|
||
|
|
||
| class AllModelsExhaustedError(RuntimeError): | ||
| """Raised when every model in an `LLMConfig.models` chain has failed or been skipped.""" | ||
|
|
||
| def __init__(self, last_exc: BaseException | None = None) -> None: | ||
| super().__init__( | ||
| "All models in the LLMConfig chain failed." | ||
| + (f" Last error: {last_exc!r}" if last_exc is not None else "") | ||
| ) | ||
| self.last_exc = last_exc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think rebase atop
mainas this has been changed a bit