@@ -287,7 +287,7 @@ describe('OpenAILegacyChatProvider', () => {
287287 ] ) ;
288288 } ) ;
289289
290- it ( 'tool call with image result flattens to text to satisfy API constraints ' , async ( ) => {
290+ it ( 'tool call with image result keeps the tool result textual and reattaches images as user input ' , async ( ) => {
291291 // OpenAI Chat Completions `tool` messages only accept text content.
292292 // Even when toolMessageConversion is unset, a tool result containing
293293 // image_url / audio_url / video_url parts must not be serialized as a
@@ -319,15 +319,86 @@ describe('OpenAILegacyChatProvider', () => {
319319 ] ;
320320 const body = await captureRequestBody ( provider , '' , [ ] , history ) ;
321321
322- const toolMsg = ( body [ 'messages' ] as Record < string , unknown > [ ] ) [ 2 ] ! ;
322+ const messages = body [ 'messages' ] as Record < string , unknown > [ ] ;
323+ const toolMsg = messages [ 2 ] ! ;
323324 expect ( toolMsg [ 'role' ] ) . toBe ( 'tool' ) ;
324325 expect ( toolMsg [ 'tool_call_id' ] ) . toBe ( 'call_abc123' ) ;
325326 // Content must be a plain string, not a content-part array.
326327 expect ( typeof toolMsg [ 'content' ] ) . toBe ( 'string' ) ;
327328 // The text segment must survive; the image must not appear as a
328- // structured image_url part anywhere in the serialized content .
329+ // structured image_url part inside the tool message .
329330 expect ( toolMsg [ 'content' ] ) . toContain ( '5' ) ;
330331 expect ( Array . isArray ( toolMsg [ 'content' ] ) ) . toBe ( false ) ;
332+ expect ( messages [ 3 ] ) . toEqual ( {
333+ role : 'user' ,
334+ content : [
335+ { type : 'text' , text : 'Attached image(s) from tool result:' } ,
336+ { type : 'image_url' , image_url : { url : 'https://example.com/image.png' } } ,
337+ ] ,
338+ } ) ;
339+ } ) ;
340+
341+ it ( 'groups consecutive tool result images after all matching tool messages' , async ( ) => {
342+ const provider = createProvider ( ) ;
343+ const history : Message [ ] = [
344+ { role : 'user' , content : [ { type : 'text' , text : 'Fetch both images' } ] , toolCalls : [ ] } ,
345+ {
346+ role : 'assistant' ,
347+ content : [ { type : 'text' , text : 'ok' } ] ,
348+ toolCalls : [
349+ { type : 'function' , id : 'call_first' , name : 'first_image' , arguments : '{}' } ,
350+ { type : 'function' , id : 'call_second' , name : 'second_image' , arguments : '{}' } ,
351+ ] ,
352+ } ,
353+ {
354+ role : 'tool' ,
355+ content : [
356+ { type : 'image_url' , imageUrl : { url : 'https://example.com/first.png' } } ,
357+ ] ,
358+ toolCallId : 'call_first' ,
359+ toolCalls : [ ] ,
360+ } ,
361+ {
362+ role : 'tool' ,
363+ content : [
364+ { type : 'text' , text : 'second' } ,
365+ { type : 'image_url' , imageUrl : { url : 'https://example.com/second.png' } } ,
366+ ] ,
367+ toolCallId : 'call_second' ,
368+ toolCalls : [ ] ,
369+ } ,
370+ ] ;
371+ const body = await captureRequestBody ( provider , '' , [ ] , history ) ;
372+
373+ expect ( body [ 'messages' ] ) . toEqual ( [
374+ { role : 'user' , content : 'Fetch both images' } ,
375+ {
376+ role : 'assistant' ,
377+ content : 'ok' ,
378+ tool_calls : [
379+ {
380+ type : 'function' ,
381+ id : 'call_first' ,
382+ function : { name : 'first_image' , arguments : '{}' } ,
383+ } ,
384+ {
385+ type : 'function' ,
386+ id : 'call_second' ,
387+ function : { name : 'second_image' , arguments : '{}' } ,
388+ } ,
389+ ] ,
390+ } ,
391+ { role : 'tool' , content : '(see attached image)' , tool_call_id : 'call_first' } ,
392+ { role : 'tool' , content : 'second' , tool_call_id : 'call_second' } ,
393+ {
394+ role : 'user' ,
395+ content : [
396+ { type : 'text' , text : 'Attached image(s) from tool result:' } ,
397+ { type : 'image_url' , image_url : { url : 'https://example.com/first.png' } } ,
398+ { type : 'image_url' , image_url : { url : 'https://example.com/second.png' } } ,
399+ ] ,
400+ } ,
401+ ] ) ;
331402 } ) ;
332403
333404 it ( 'parallel tool calls' , async ( ) => {
0 commit comments