Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions containers/api-proxy/token-tracker.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ function createDecompressor(headers) {
* Extract token usage from a non-streaming JSON response body.
*
* Supports:
* - OpenAI/Copilot: { usage: { prompt_tokens, completion_tokens, total_tokens } }
* - OpenAI/Copilot: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } }
* - Anthropic: { usage: { input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens } }
*
* Also extracts the model field if present.
Expand Down Expand Up @@ -180,6 +180,11 @@ function extractUsageFromJson(body) {
usage.total_tokens = json.usage.total_tokens;
hasField = true;
}
// OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens)
if (json.usage.prompt_tokens_details && typeof json.usage.prompt_tokens_details.cached_tokens === 'number') {
usage.cache_read_input_tokens = json.usage.prompt_tokens_details.cached_tokens;
hasField = true;
}
Comment on lines +183 to +187

Copilot AI Apr 2, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

normalizeUsage()’s JSDoc (later in this file) still states cache_read_tokens/cache_write_tokens are “Anthropic only, 0 for others”, but this change now populates cache_read_input_tokens from OpenAI/Copilot prompt_tokens_details.cached_tokens, which will result in non-zero cache_read_tokens for OpenAI/Copilot too. Please update the normalization doc comment to reflect the new supported source(s) so future readers don’t assume cache fields are Anthropic-only.

Copilot uses AI. Check for mistakes.
if (hasField) {
result.usage = usage;
}
Expand All @@ -201,7 +206,7 @@ function extractUsageFromJson(body) {
* - message_delta: { type: "message_delta", usage: { output_tokens } }
*
* OpenAI/Copilot streaming events with usage:
* - Final chunk: { usage: { prompt_tokens, completion_tokens, total_tokens } }
* - Final chunk: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } }
*
* @param {string} line - A single SSE data line (without "data: " prefix)
* @returns {{ usage: object|null, model: string|null }}
Expand Down Expand Up @@ -237,6 +242,10 @@ function extractUsageFromSseLine(line) {
if (typeof json.usage.prompt_tokens === 'number') result.usage.prompt_tokens = json.usage.prompt_tokens;
if (typeof json.usage.completion_tokens === 'number') result.usage.completion_tokens = json.usage.completion_tokens;
if (typeof json.usage.total_tokens === 'number') result.usage.total_tokens = json.usage.total_tokens;
// OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens)
if (json.usage.prompt_tokens_details && typeof json.usage.prompt_tokens_details.cached_tokens === 'number') {
result.usage.cache_read_input_tokens = json.usage.prompt_tokens_details.cached_tokens;
}
return result;
}

Expand Down
83 changes: 83 additions & 0 deletions containers/api-proxy/token-tracker.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,50 @@ describe('extractUsageFromJson', () => {
const result = extractUsageFromJson(body);
expect(result.usage).toEqual({ completion_tokens: 50 });
});

test('extracts OpenAI prompt_tokens_details.cached_tokens', () => {
const body = Buffer.from(JSON.stringify({
id: 'chatcmpl-456',
model: 'claude-sonnet-4.6',
usage: {
prompt_tokens: 41344,
completion_tokens: 256,
total_tokens: 41600,
prompt_tokens_details: {
cached_tokens: 36500,
},
},
}));

const result = extractUsageFromJson(body);
expect(result.model).toBe('claude-sonnet-4.6');
expect(result.usage).toEqual({
prompt_tokens: 41344,
completion_tokens: 256,
total_tokens: 41600,
cache_read_input_tokens: 36500,
});
});

test('handles OpenAI usage without prompt_tokens_details', () => {
const body = Buffer.from(JSON.stringify({
model: 'gpt-4o',
usage: {
prompt_tokens: 100,
completion_tokens: 50,
total_tokens: 150,
},
}));

const result = extractUsageFromJson(body);
expect(result.usage).toEqual({
prompt_tokens: 100,
completion_tokens: 50,
total_tokens: 150,
});
// Should NOT have cache_read_input_tokens
expect(result.usage.cache_read_input_tokens).toBeUndefined();
});
});

// ── extractUsageFromSseLine ───────────────────────────────────────────
Expand Down Expand Up @@ -187,6 +231,30 @@ describe('extractUsageFromSseLine', () => {
const result = extractUsageFromSseLine('invalid json');
expect(result.usage).toBeNull();
});

test('extracts OpenAI prompt_tokens_details.cached_tokens from streaming final chunk', () => {
const line = JSON.stringify({
model: 'claude-sonnet-4.6',
choices: [{ finish_reason: 'stop' }],
usage: {
prompt_tokens: 43977,
completion_tokens: 24,
total_tokens: 44001,
prompt_tokens_details: {
cached_tokens: 43894,
},
},
});

const result = extractUsageFromSseLine(line);
expect(result.model).toBe('claude-sonnet-4.6');
expect(result.usage).toEqual({
prompt_tokens: 43977,
completion_tokens: 24,
total_tokens: 44001,
cache_read_input_tokens: 43894,
});
});
});

// ── parseSseDataLines ─────────────────────────────────────────────────
Expand Down Expand Up @@ -283,6 +351,21 @@ describe('normalizeUsage', () => {
expect(result.input_tokens).toBe(200);
expect(result.output_tokens).toBe(80);
});

test('normalizes OpenAI cache tokens via cache_read_input_tokens mapping', () => {
const result = normalizeUsage({
prompt_tokens: 43977,
completion_tokens: 24,
total_tokens: 44001,
cache_read_input_tokens: 43894,
});
expect(result).toEqual({
input_tokens: 43977,
output_tokens: 24,
cache_read_tokens: 43894,
cache_write_tokens: 0,
});
});
});

// ── isStreamingResponse ───────────────────────────────────────────────
Expand Down
Loading