Skip to content

Commit 4fa2ff4

Browse files
authored
Reduce amount of BASE64 added to requests by replacing images with placeholders (#308901)
1 parent 8c15ca4 commit 4fa2ff4

File tree

2 files changed

+251
-16
lines changed

2 files changed

+251
-16
lines changed

extensions/copilot/src/extension/prompts/node/panel/test/toolCalling.spec.ts

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66
import { describe, expect, test } from 'vitest';
77
import type * as vscode from 'vscode';
88
import { IChatHookService, type IPreToolUseHookResult } from '../../../../../platform/chat/common/chatHookService';
9+
import { ConfigKey, IConfigurationService } from '../../../../../platform/configuration/common/configurationService';
910
import { IEndpointProvider } from '../../../../../platform/endpoint/common/endpointProvider';
1011
import { DeferredPromise } from '../../../../../util/vs/base/common/async';
1112
import { CancellationToken } from '../../../../../util/vs/base/common/cancellation';
1213
import { Event } from '../../../../../util/vs/base/common/event';
1314
import { constObservable } from '../../../../../util/vs/base/common/observable';
1415
import { IInstantiationService } from '../../../../../util/vs/platform/instantiation/common/instantiation';
15-
import { LanguageModelTextPart, LanguageModelToolResult } from '../../../../../vscodeTypes';
16+
import { LanguageModelDataPart, LanguageModelTextPart, LanguageModelToolResult } from '../../../../../vscodeTypes';
1617
import { ChatVariablesCollection } from '../../../../prompt/common/chatVariablesCollection';
1718
import type { Conversation } from '../../../../prompt/common/conversation';
1819
import type { IBuildPromptContext, IToolCallRound } from '../../../../prompt/common/intents';
@@ -450,4 +451,139 @@ describe('ChatToolCalls (toolCalling.tsx)', () => {
450451
expect(contentText).toContain(denyContext);
451452
expect(contentText).not.toContain('<PostToolUse-context>');
452453
});
454+
455+
test('replaces images with placeholders for historical turns', async () => {
456+
const toolName = 'viewImage';
457+
const toolCallId = 'call-img-1';
458+
459+
const toolInfo: vscode.LanguageModelToolInformation = {
460+
name: toolName,
461+
description: 'view image tool',
462+
source: undefined,
463+
inputSchema: undefined,
464+
tags: [],
465+
};
466+
467+
const testingServiceCollection = createExtensionUnitTestingServices();
468+
const toolsService = new CapturingToolsService(toolInfo);
469+
testingServiceCollection.define(IToolsService, toolsService);
470+
471+
const accessor = testingServiceCollection.createTestingAccessor();
472+
const instantiationService = accessor.get(IInstantiationService);
473+
const endpointProvider = accessor.get(IEndpointProvider);
474+
const endpoint = await endpointProvider.getChatEndpoint('copilot-base');
475+
476+
const imageData = new Uint8Array(1024);
477+
const toolCallResults: Record<string, vscode.LanguageModelToolResult> = {
478+
[toolCallId]: new LanguageModelToolResult([
479+
new LanguageModelTextPart('some text result'),
480+
LanguageModelDataPart.image(imageData, 'image/png'),
481+
]),
482+
};
483+
484+
const round: IToolCallRound = {
485+
id: 'round-1',
486+
response: 'viewing image',
487+
toolInputRetry: 0,
488+
toolCalls: [{ name: toolName, arguments: '{}', id: toolCallId }],
489+
};
490+
491+
const promptContext: IBuildPromptContext = {
492+
query: 'test',
493+
history: [],
494+
chatVariables: new ChatVariablesCollection(),
495+
conversation: { sessionId: 'session-img' } as unknown as Conversation,
496+
request: {} as vscode.ChatRequest,
497+
tools: {
498+
toolReferences: [],
499+
toolInvocationToken: {} as vscode.ChatParticipantToolToken,
500+
availableTools: [toolInfo],
501+
},
502+
};
503+
504+
const { messages } = await renderPromptElement(instantiationService, endpoint, ChatToolCalls, {
505+
promptContext,
506+
toolCallRounds: [round],
507+
toolCallResults,
508+
isHistorical: true,
509+
});
510+
511+
const serialized = JSON.stringify(messages);
512+
expect(serialized).toContain('Image was previously shown to you');
513+
expect(serialized).toContain('some text result');
514+
// Should not contain base64 image data
515+
expect(serialized).not.toContain('image_url');
516+
});
517+
518+
test('enforces shared image budget across tool results', async () => {
519+
const toolName = 'viewImage';
520+
const firstCallId = 'call-big-1';
521+
const secondCallId = 'call-big-2';
522+
523+
const toolInfo: vscode.LanguageModelToolInformation = {
524+
name: toolName,
525+
description: 'view image tool',
526+
source: undefined,
527+
inputSchema: undefined,
528+
tags: [],
529+
};
530+
531+
const testingServiceCollection = createExtensionUnitTestingServices();
532+
const toolsService = new CapturingToolsService(toolInfo);
533+
testingServiceCollection.define(IToolsService, toolsService);
534+
535+
const accessor = testingServiceCollection.createTestingAccessor();
536+
const instantiationService = accessor.get(IInstantiationService);
537+
const endpointProvider = accessor.get(IEndpointProvider);
538+
const endpoint = await endpointProvider.getChatEndpoint('copilot-base');
539+
540+
// Disable image uploads so images go through the base64 path where the budget applies
541+
const configService = accessor.get(IConfigurationService);
542+
await configService.setConfig(ConfigKey.EnableChatImageUpload, false);
543+
544+
// Each image is 3MB — individually exceeds the 2.5MB shared budget (half of 5MB CAPI limit)
545+
const bigImage = new Uint8Array(3 * 1024 * 1024);
546+
const toolCallResults: Record<string, vscode.LanguageModelToolResult> = {
547+
[firstCallId]: new LanguageModelToolResult([
548+
LanguageModelDataPart.image(bigImage, 'image/png'),
549+
]),
550+
[secondCallId]: new LanguageModelToolResult([
551+
LanguageModelDataPart.image(bigImage, 'image/png'),
552+
]),
553+
};
554+
555+
const round: IToolCallRound = {
556+
id: 'round-1',
557+
response: 'viewing images',
558+
toolInputRetry: 0,
559+
toolCalls: [
560+
{ name: toolName, arguments: '{}', id: firstCallId },
561+
{ name: toolName, arguments: '{}', id: secondCallId },
562+
],
563+
};
564+
565+
const promptContext: IBuildPromptContext = {
566+
query: 'test',
567+
history: [],
568+
chatVariables: new ChatVariablesCollection(),
569+
conversation: { sessionId: 'session-budget' } as unknown as Conversation,
570+
request: {} as vscode.ChatRequest,
571+
tools: {
572+
toolReferences: [],
573+
toolInvocationToken: {} as vscode.ChatParticipantToolToken,
574+
availableTools: [toolInfo],
575+
},
576+
};
577+
578+
const { messages } = await renderPromptElement(instantiationService, endpoint, ChatToolCalls, {
579+
promptContext,
580+
toolCallRounds: [round],
581+
toolCallResults,
582+
});
583+
584+
const serialized = JSON.stringify(messages);
585+
// Both images exceed the 2.5MB shared budget and should be replaced with placeholders
586+
expect(serialized).toContain('context image budget exceeded');
587+
expect(serialized).not.toContain('image_url');
588+
});
453589
});

0 commit comments

Comments
 (0)