Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions deeptutor/services/llm/provider_core/azure_openai_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from deeptutor.services.llm.provider_core.base import LLMProvider, LLMResponse
from deeptutor.services.llm.provider_core.openai_responses import (
adapt_chat_kwargs_to_responses,
consume_sdk_stream,
convert_messages,
convert_tools,
Expand Down Expand Up @@ -122,7 +123,7 @@ async def chat(
reasoning_effort,
tool_choice,
)
body.update({k: v for k, v in extra_kwargs.items() if v is not None})
body.update(adapt_chat_kwargs_to_responses(extra_kwargs))
try:
return parse_response_output(await self._client.responses.create(**body))
except Exception as exc:
Expand Down Expand Up @@ -150,7 +151,7 @@ async def chat_stream(
reasoning_effort,
tool_choice,
)
body.update({k: v for k, v in extra_kwargs.items() if v is not None})
body.update(adapt_chat_kwargs_to_responses(extra_kwargs))
body["stream"] = True
idle_timeout_s = 90

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from deeptutor.services.llm.capabilities import disable_response_format_at_runtime
from deeptutor.services.llm.provider_core.base import LLMProvider, LLMResponse, ToolCallRequest
from deeptutor.services.llm.provider_core.openai_responses import (
adapt_chat_kwargs_to_responses,
consume_sdk_stream,
convert_messages,
convert_tools,
Expand Down Expand Up @@ -694,7 +695,7 @@ async def chat(
reasoning_effort,
tool_choice,
)
body.update({k: v for k, v in extra_kwargs.items() if v is not None})
body.update(adapt_chat_kwargs_to_responses(extra_kwargs))
result = parse_response_output(await self._client.responses.create(**body))
self._record_responses_success(model, reasoning_effort)
return result
Expand Down Expand Up @@ -777,7 +778,7 @@ async def chat_stream(
reasoning_effort,
tool_choice,
)
body.update({k: v for k, v in extra_kwargs.items() if v is not None})
body.update(adapt_chat_kwargs_to_responses(extra_kwargs))
body["stream"] = True
stream = await self._client.responses.create(**body)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
"""Shared helpers for Responses API providers."""

from .converters import convert_messages, convert_tools, convert_user_message, split_tool_call_id
from .converters import (
adapt_chat_kwargs_to_responses,
convert_messages,
convert_tools,
convert_user_message,
split_tool_call_id,
)
from .parsing import (
FINISH_REASON_MAP,
consume_sdk_stream,
Expand All @@ -11,6 +17,7 @@
)

__all__ = [
"adapt_chat_kwargs_to_responses",
"convert_messages",
"convert_tools",
"convert_user_message",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,30 @@ def split_tool_call_id(tool_call_id: Any) -> tuple[str, str | None]:
return call_id, item_id or None
return tool_call_id, None
return "call_0", None


def adapt_chat_kwargs_to_responses(extra_kwargs: dict[str, Any]) -> dict[str, Any]:
"""Translate Chat Completions kwargs to Responses API equivalents.

Callers building requests for the Chat Completions endpoint may pass
``max_completion_tokens`` for newer OpenAI models (o1/o3/gpt-4o/gpt-5.x).
The Responses API does not accept that name and uses ``max_output_tokens``
instead, so the OpenAI SDK raises ``TypeError`` from ``responses.create``
before any HTTP request leaves the client. See DeepTutor#437.

Drops keys with ``None`` values to match the existing merge filter, and
only applies the alias when the caller did not already set the Responses
name explicitly.
"""
result: dict[str, Any] = {}
legacy_max_completion: int | None = None
for key, value in extra_kwargs.items():
if value is None:
continue
if key == "max_completion_tokens":
legacy_max_completion = value
continue
result[key] = value
if legacy_max_completion is not None and "max_output_tokens" not in result:
result["max_output_tokens"] = legacy_max_completion
return result
45 changes: 45 additions & 0 deletions tests/services/llm/test_openai_responses_converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Tests for the Responses API converter helpers."""

from __future__ import annotations

from deeptutor.services.llm.provider_core.openai_responses import (
adapt_chat_kwargs_to_responses,
)


class TestAdaptChatKwargsToResponses:
def test_passes_through_unrelated_kwargs(self) -> None:
result = adapt_chat_kwargs_to_responses({"temperature": 0.2, "tool_choice": "auto"})
assert result == {"temperature": 0.2, "tool_choice": "auto"}

def test_drops_none_values(self) -> None:
result = adapt_chat_kwargs_to_responses({"temperature": 0.2, "response_format": None})
assert result == {"temperature": 0.2}

def test_translates_max_completion_tokens_to_max_output_tokens(self) -> None:
# Regression for DeepTutor#437: gpt-5.x callers pass
# `max_completion_tokens` from `get_token_limit_kwargs(model, n)`,
# but the Responses API only accepts `max_output_tokens`.
result = adapt_chat_kwargs_to_responses({"max_completion_tokens": 8192, "temperature": 0.2})
assert result == {"max_output_tokens": 8192, "temperature": 0.2}
assert "max_completion_tokens" not in result

def test_drops_max_completion_tokens_when_none(self) -> None:
result = adapt_chat_kwargs_to_responses({"max_completion_tokens": None, "temperature": 0.2})
assert result == {"temperature": 0.2}

def test_explicit_max_output_tokens_wins_over_alias(self) -> None:
# If the caller already set the Responses API name explicitly, do not
# overwrite it with the chat-completions alias value.
result = adapt_chat_kwargs_to_responses(
{"max_completion_tokens": 8192, "max_output_tokens": 4096}
)
assert result == {"max_output_tokens": 4096}

def test_empty_input_returns_empty_dict(self) -> None:
assert adapt_chat_kwargs_to_responses({}) == {}

def test_does_not_mutate_input(self) -> None:
source = {"max_completion_tokens": 8192, "temperature": 0.2}
adapt_chat_kwargs_to_responses(source)
assert source == {"max_completion_tokens": 8192, "temperature": 0.2}
Loading