matdev83
diff --git a/‎src/anthropic_converters.py‎
Lines changed: 22 additions & 0 deletions b/‎src/anthropic_converters.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/connectors/openai_codex/executor.py‎
Lines changed: 8 additions & 6 deletions b/‎src/connectors/openai_codex/executor.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎src/core/app/controllers/__init__.py‎
Lines changed: 11 additions & 6 deletions b/‎src/core/app/controllers/__init__.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎src/core/domain/gemini_translation.py‎
Lines changed: 32 additions & 11 deletions b/‎src/core/domain/gemini_translation.py‎
Lines changed: 32 additions & 11 deletions
diff --git a/‎src/core/domain/translators/responses/streaming.py‎
Lines changed: 13 additions & 2 deletions b/‎src/core/domain/translators/responses/streaming.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/core/services/backend_request_manager/streaming_response_handler.py‎
Lines changed: 8 additions & 0 deletions b/‎src/core/services/backend_request_manager/streaming_response_handler.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/core/services/translation_service.py‎
Lines changed: 3 additions & 0 deletions b/‎src/core/services/translation_service.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/chat_completions_tests/test_anthropic_frontend.py‎
Lines changed: 70 additions & 28 deletions b/‎tests/chat_completions_tests/test_anthropic_frontend.py‎
Lines changed: 70 additions & 28 deletions
@@ -888,6 +888,19 @@ def _flatten_tool_result_content(content: Any) -> str:
     return "" if content is None else str(content)
 
 
+def _openai_error_to_anthropic_error(error_payload: Any) -> dict[str, Any]:
+    if isinstance(error_payload, dict):
+        message = error_payload.get("message") or "Upstream request failed."
+        error_type = error_payload.get("type") or error_payload.get("code")
+    else:
+        message = str(error_payload)
+        error_type = None
+    return {
+        "type": str(error_type or "api_error"),
+        "message": str(message),
+    }
+
+
 async def openai_stream_to_anthropic_stream(
     chunk_generator: AsyncGenerator[bytes, None],
     request: AnthropicMessagesRequest,
@@ -983,6 +996,15 @@ def _translate_payload(payload_str: str) -> PayloadTranslationResult:
         if logger.isEnabledFor(TRACE_LEVEL):
             logger.log(TRACE_LEVEL, f"PARSED_CHUNK: {openai_chunk}")
 
+        if isinstance(openai_chunk.get("error"), dict):
+            error_payload = {
+                "type": "error",
+                "error": _openai_error_to_anthropic_error(openai_chunk["error"]),
+            }
+            error_event = f"event: error\ndata: {json.dumps(error_payload, ensure_ascii=False, separators=(',', ':'))}\n\n"
+            events.append(error_event)
+            return PayloadTranslationResult(is_done_marker=True, events=events)
+
         if not choices:
             usage = openai_chunk.get("usage")
             if isinstance(usage, dict):
 
@@ -1938,12 +1938,14 @@ def _normalize_processed_stream_chunk(
         if not content_dict:
             return chunk
 
-        if event_type in ("response.done", "response.completed"):
-            content_dict = {"type": "response.completed", "response": content_dict}
-        elif "choices" in content_dict or not str(
-            content_dict.get("type") or ""
-        ).startswith("response."):
-            return chunk
+        if event_type in ("response.done", "response.completed"):
+            content_dict = {"type": "response.completed", "response": content_dict}
+        elif event_type == "error":
+            content_dict = {"type": "error", **content_dict}
+        elif "choices" in content_dict or not str(
+            content_dict.get("type") or ""
+        ).startswith("response."):
+            return chunk
 
         translation_service = getattr(self._base_connector, "translation_service", None)
         if translation_service is None:
 
@@ -1160,13 +1160,15 @@ async def gemini_stream_generate_content(
             # Get backend service
             backend_service = service_provider.get_required_service(IBackendService)  # type: ignore[type-abstract]
 
+            backend_result = await backend_service.call_completion(
+                domain_request, stream=True, context=ctx
+            )
+            stream_status_code = getattr(backend_result, "status_code", 200)
+
             async def generate_stream() -> AsyncGenerator[bytes, None]:
 
                 try:
-                    # Call the backend service
-                    result = await backend_service.call_completion(
-                        domain_request, stream=True, context=ctx
-                    )
+                    result = backend_result
 
                     if hasattr(result, "content") and hasattr(
                         result.content, "__aiter__"
@@ -1329,7 +1331,11 @@ async def _empty_stream() -> AsyncIterator[Any]:
                         e,
                         exc_info=True,
                     )
-            return StreamingResponse(stream_iter, media_type="text/event-stream")
+            return StreamingResponse(
+                stream_iter,
+                media_type="text/event-stream",
+                status_code=stream_status_code,
+            )
         except Exception as e:
             logger.exception(
                 f"Error in Gemini stream generate content: {e}", exc_info=True
@@ -1385,7 +1391,6 @@ async def anthropic_models(
                 BackendRoutingService,
             )
 
-
             dummy_response = DummyResponse()
             routing_service = service_provider.get_required_service(
                 BackendRoutingService
 
@@ -356,7 +356,7 @@ def gemini_request_to_canonical_request(
     )
 
 
-def _map_finish_reason_to_gemini(finish_reason: str | None) -> str:
+def _map_finish_reason_to_gemini(finish_reason: str | None) -> str:
     """Map canonical finish reason to Gemini finish reason format.
 
     Gemini API uses: STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER, FINISH_REASON_UNSPECIFIED
@@ -377,23 +377,44 @@ def _map_finish_reason_to_gemini(finish_reason: str | None) -> str:
         "recitation": "RECITATION",
         "other": "OTHER",
     }
-    return mapping.get(finish_reason_lower, finish_reason.upper())
-
-
-def canonical_response_to_gemini_response(
-    response: dict[str, Any], is_streaming: bool = False
-) -> dict[str, Any]:
+    return mapping.get(finish_reason_lower, finish_reason.upper())
+
+
+def _canonical_error_to_gemini_error(error_payload: Any) -> dict[str, Any]:
+    if isinstance(error_payload, dict):
+        code = error_payload.get("code")
+        message = error_payload.get("message") or "Upstream request failed."
+    else:
+        code = None
+        message = str(error_payload)
+
+    status = "INVALID_ARGUMENT" if code == "context_length_exceeded" else "UNKNOWN"
+    return {
+        "error": {
+            "code": 400 if code == "context_length_exceeded" else 500,
+            "message": str(message),
+            "status": status,
+        }
+    }
+
+
+def canonical_response_to_gemini_response(
+    response: dict[str, Any], is_streaming: bool = False
+) -> dict[str, Any]:
     """
     Convert a canonical response to Gemini API format.
 
     Args:
         response: Canonical response in OpenAI format
         is_streaming: Whether this is a streaming response
 
-    Returns:
-        Response in Gemini API format
-    """
-    if not is_streaming:
+    Returns:
+        Response in Gemini API format
+    """
+    if "error" in response:
+        return _canonical_error_to_gemini_error(response["error"])
+
+    if not is_streaming:
         # Non-streaming response
         candidates = []
 
 
@@ -299,6 +299,17 @@ def _build_chunk(
             ],
         }
 
+    def _build_error_chunk(error_payload: Any) -> dict[str, Any]:
+        if isinstance(error_payload, dict):
+            error_dict = dict(error_payload)
+        else:
+            error_dict = {"message": str(error_payload), "type": "api_error"}
+        error_dict.setdefault("message", "Responses stream reported failure")
+        error_dict.setdefault("type", "api_error")
+        result = _build_chunk({}, "error")
+        result["error"] = error_dict
+        return result
+
     if event_type == "response.output_text.delta":
         delta_payload = chunk.get("delta")
         text = _extract_text(delta_payload)
@@ -575,12 +586,12 @@ def _needs_accumulated_tool_arguments(val: Any) -> bool:
         created_delta["role"] = "assistant"
         return _build_chunk(created_delta or None)
 
-    if event_type == "response.failed":
+    if event_type in ("error", "response.failed"):
         response_info = chunk.get("response") or {}
         error_payload = response_info.get("error") or chunk.get("error") or {}
         reset_tool_call_state(response_info.get("id") or chunk_id)
         _active_responses_stream_id.set(None)
-        return {"error": "Responses stream reported failure", "details": error_payload}
+        return _build_error_chunk(error_payload)
 
     if event_type == "response.output_item.added":
         item = chunk.get("item") or {}
 
@@ -248,6 +248,14 @@ def _extract_terminal_error_status(chunk: Any) -> int | None:
                         default_unknown_error_status=502,
                     )
                 )
+            payload_details = content.get("details")
+            if payload_error and isinstance(payload_details, dict):
+                return (
+                    BackendStreamingResponseHandler._status_from_stream_error_payload(
+                        payload_details,
+                        default_unknown_error_status=502,
+                    )
+                )
 
             choices = content.get("choices")
             if isinstance(choices, list):
 
@@ -305,6 +305,9 @@ def to_domain_stream_chunk(
                 for c in choices_val
             ]
 
+        if "error" in result:
+            return result
+
         if logger.isEnabledFor(TRACE_LEVEL):
             result_type = type(result).__name__
             result_keys = list(result.keys())
 
@@ -1,30 +1,33 @@
-from collections.abc import AsyncGenerator
+import json
+from collections.abc import AsyncGenerator
 from unittest.mock import AsyncMock, patch
 
 import pytest
 
 pytestmark = pytest.mark.filterwarnings(
     "ignore:unclosed event loop <ProactorEventLoop.*:ResourceWarning"
 )
-from fastapi.testclient import TestClient
-from src.core.app.test_builder import build_test_app as build_app
-from src.core.config.app_config import (
-    AppConfig,
-    AuthConfig,
-    BackendConfig,
-    BackendSettings,
-    LoggingConfig,
-    SessionConfig,
-)
-from src.core.domain.chat import (
-    ChatCompletionChoice,
-    ChatCompletionChoiceMessage,
-)
-from src.core.domain.chat import (
-    ChatResponse as ChatCompletionResponse,
-)
-
-
+from fastapi.testclient import TestClient
+from src.anthropic_converters import openai_stream_to_anthropic_stream
+from src.anthropic_models import AnthropicMessagesRequest
+from src.core.app.test_builder import build_test_app as build_app
+from src.core.config.app_config import (
+    AppConfig,
+    AuthConfig,
+    BackendConfig,
+    BackendSettings,
+    LoggingConfig,
+    SessionConfig,
+)
+from src.core.domain.chat import (
+    ChatCompletionChoice,
+    ChatCompletionChoiceMessage,
+)
+from src.core.domain.chat import (
+    ChatResponse as ChatCompletionResponse,
+)
+
+
 @pytest.fixture()
 def anthropic_client():
     """Create TestClient with config patched for Anthropic."""
@@ -203,7 +206,7 @@ async def generator() -> AsyncGenerator[bytes, None]:
     return generator()
 
 
-def test_anthropic_messages_streaming_frontend(anthropic_client):
+def test_anthropic_messages_streaming_frontend(anthropic_client):
     with patch(
         "src.core.services.request_processor_service.RequestProcessor.process_request",
         new_callable=AsyncMock,
@@ -251,13 +254,52 @@ async def mock_streaming_generator():
                 text += chunk
             # Check that we get Anthropic streaming format
             assert "content_block_delta" in text or "delta" in text
-            assert "event: message_stop" in text
-            mock_process.assert_awaited_once()
-
-
-# ------------------------------------------------------------
-# Auth error
-# ------------------------------------------------------------
+            assert "event: message_stop" in text
+            mock_process.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_anthropic_stream_converts_openai_terminal_error_to_error_event():
+    async def source() -> AsyncGenerator[bytes, None]:
+        payload = {
+            "id": "chatcmpl-context-length",
+            "object": "chat.completion.chunk",
+            "created": 123,
+            "model": "gpt-5.5",
+            "choices": [{"index": 0, "delta": {}, "finish_reason": "error"}],
+            "error": {
+                "type": "invalid_request_error",
+                "code": "context_length_exceeded",
+                "message": "Your input exceeds the context window.",
+                "param": "input",
+            },
+        }
+        yield f"data: {json.dumps(payload)}\n\n".encode()
+
+    request = AnthropicMessagesRequest(
+        model="claude-3-haiku-20240229",
+        max_tokens=128,
+        messages=[{"role": "user", "content": "Hello"}],
+        stream=True,
+    )
+
+    events = [
+        event
+        async for event in openai_stream_to_anthropic_stream(
+            source(), request, request.model, "session-error"
+        )
+    ]
+    body = "".join(events)
+
+    assert "event: error" in body
+    assert "context_length_exceeded" not in body
+    assert "Your input exceeds the context window." in body
+    assert "event: message_delta" not in body
+
+
+# ------------------------------------------------------------
+# Auth error
+# ------------------------------------------------------------
 
 
 def test_anthropic_messages_auth_failure(anthropic_client):
Original file line number	Diff line number	Diff line change
`@@ -248,6 +248,14 @@ def _extract_terminal_error_status(chunk: Any) -> int \| None:`
`248`	`248`	`default_unknown_error_status=502,`
`249`	`249`	`)`
`250`	`250`	`)`
	`251`	`+ payload_details = content.get("details")`
	`252`	`+ if payload_error and isinstance(payload_details, dict):`
	`253`	`+ return (`
	`254`	`+ BackendStreamingResponseHandler._status_from_stream_error_payload(`
	`255`	`+ payload_details,`
	`256`	`+ default_unknown_error_status=502,`
	`257`	`+ )`
	`258`	`+ )`
`251`	`259`
`252`	`260`	`choices = content.get("choices")`
`253`	`261`	`if isinstance(choices, list):`
Original file line number	Diff line number	Diff line change
`@@ -305,6 +305,9 @@ def to_domain_stream_chunk(`
`305`	`305`	`for c in choices_val`
`306`	`306`	`]`
`307`	`307`
	`308`	`+ if "error" in result:`
	`309`	`+ return result`
	`310`	`+`
`308`	`311`	`if logger.isEnabledFor(TRACE_LEVEL):`
`309`	`312`	`result_type = type(result).__name__`
`310`	`313`	`result_keys = list(result.keys())`