-
Notifications
You must be signed in to change notification settings - Fork 145
feat(reasoning): bucket max_tokens to effort on adaptive Opus #2753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,63 @@ import { transformGoogleMessages } from "./transform-google-messages.js"; | |
|
|
||
| type OpenAIImageQuality = "low" | "medium" | "high" | "auto"; | ||
|
|
||
| type AdaptiveEffort = "low" | "medium" | "high" | "xhigh" | "max"; | ||
|
|
||
| /** | ||
| * Resolve `output_config.effort` for adaptive-thinking Anthropic models | ||
| * (Opus 4.6+). Precedence: explicit `effort`, then `reasoning_effort` mapped | ||
| * onto the adaptive scale, then `reasoning.max_tokens` bucketed into an effort | ||
| * level. Adaptive models reject `budget_tokens`, so a requested budget is | ||
| * translated into a depth hint instead of being dropped. Returns undefined when | ||
| * no reasoning controls were sent, leaving the model at its default depth. | ||
| */ | ||
| function resolveAdaptiveEffort( | ||
| effort: "low" | "medium" | "high" | undefined, | ||
| reasoning_effort: | ||
| | "none" | ||
| | "minimal" | ||
| | "low" | ||
| | "medium" | ||
| | "high" | ||
| | "xhigh" | ||
| | "max" | ||
| | undefined, | ||
| reasoning_max_tokens: number | undefined, | ||
| ): AdaptiveEffort | undefined { | ||
| if (effort !== undefined) { | ||
| return effort; | ||
| } | ||
| if (reasoning_effort) { | ||
| switch (reasoning_effort) { | ||
| case "minimal": | ||
| case "low": | ||
| return "low"; | ||
| case "medium": | ||
| return "medium"; | ||
| case "xhigh": | ||
| return "xhigh"; | ||
| case "max": | ||
| return "max"; | ||
| case "high": | ||
| default: | ||
| return "high"; | ||
| } | ||
| } | ||
| if (reasoning_max_tokens !== undefined) { | ||
| if (reasoning_max_tokens < 2000) { | ||
| return "low"; | ||
| } | ||
| if (reasoning_max_tokens < 8000) { | ||
| return "medium"; | ||
| } | ||
| if (reasoning_max_tokens < 24000) { | ||
| return "high"; | ||
| } | ||
| return "xhigh"; | ||
| } | ||
| return undefined; | ||
| } | ||
|
|
||
| function getProviderMapping( | ||
| modelDef: ModelDefinition | undefined, | ||
| usedProvider: ProviderId, | ||
|
|
@@ -1908,28 +1965,21 @@ export async function prepareRequestBody( | |
| // thinking text — their default flipped to "omitted" (empty thinking, | ||
| // signature only), unlike Opus 4.6 which defaults to "summarized". | ||
| requestBody.thinking = { type: "adaptive", display: "summarized" }; | ||
| if (effort === undefined && reasoning_effort) { | ||
| const mapEffort = ( | ||
| e: typeof reasoning_effort, | ||
| ): "low" | "medium" | "high" | "xhigh" | "max" => { | ||
| switch (e) { | ||
| case "minimal": | ||
| case "low": | ||
| return "low"; | ||
| case "medium": | ||
| return "medium"; | ||
| case "high": | ||
| return "high"; | ||
| case "xhigh": | ||
| return "xhigh"; | ||
| case "max": | ||
| return "max"; | ||
| default: | ||
| return "high"; | ||
| } | ||
| }; | ||
| requestBody.output_config ??= {}; | ||
| requestBody.output_config.effort = mapEffort(reasoning_effort); | ||
| // Explicit `effort` is applied below alongside the other optional | ||
| // parameters; here we derive the adaptive depth from | ||
| // `reasoning_effort` or, failing that, bucket a requested | ||
| // `reasoning.max_tokens` into an effort level so the budget still | ||
| // influences depth instead of being silently dropped. | ||
| if (effort === undefined) { | ||
| const adaptiveEffort = resolveAdaptiveEffort( | ||
| undefined, | ||
| reasoning_effort, | ||
| reasoning_max_tokens, | ||
| ); | ||
| if (adaptiveEffort !== undefined) { | ||
| requestBody.output_config ??= {}; | ||
| requestBody.output_config.effort = adaptiveEffort; | ||
|
Comment on lines
+1983
to
+1985
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When an adaptive Anthropic request includes both Useful? React with 👍 / 👎. |
||
| } | ||
| } | ||
| } else { | ||
| requestBody.thinking = { | ||
|
|
@@ -2380,28 +2430,11 @@ export async function prepareRequestBody( | |
| type: "adaptive", | ||
| display: "summarized", | ||
| }; | ||
| const mapEffort = ( | ||
| e: typeof reasoning_effort, | ||
| ): "low" | "medium" | "high" | "xhigh" | "max" => { | ||
| switch (e) { | ||
| case "minimal": | ||
| case "low": | ||
| return "low"; | ||
| case "medium": | ||
| return "medium"; | ||
| case "high": | ||
| return "high"; | ||
| case "xhigh": | ||
| return "xhigh"; | ||
| case "max": | ||
| return "max"; | ||
| default: | ||
| return "high"; | ||
| } | ||
| }; | ||
| const adaptiveEffort = | ||
| effort ?? | ||
| (reasoning_effort ? mapEffort(reasoning_effort) : undefined); | ||
| const adaptiveEffort = resolveAdaptiveEffort( | ||
| effort, | ||
| reasoning_effort, | ||
| reasoning_max_tokens, | ||
| ); | ||
| if (adaptiveEffort !== undefined) { | ||
| requestBody.additionalModelRequestFields.output_config = { | ||
| effort: adaptiveEffort, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fresh evidence in this patch is the new
reasoning_max_tokensbucketing call here, but gateway traffic still cannot reach it forclaude-opus-4-6/4-7/4-8:validateModelCapabilities()only acceptsreasoning.max_tokenswhen a mapping hasreasoningMaxTokens === true, and the adaptive Opus mappings only declarereasoningMode: "adaptive"; auto-routing has the sameprovider.reasoningMaxTokens !== truefilter. As a result requests likemodel: "anthropic/claude-opus-4-8"withreasoning.max_tokensare rejected/filtered beforeprepareRequestBodycan translate the budget intooutput_config.effort.Useful? React with 👍 / 👎.