Fix firewall token usage reporting to preserve raw counts and remove cache-rate transforms (#31581)

Copilot · pelikhan · web-flow · commit 21926a00deb8 · 2026-05-11T15:10:19.000-07:00
* Initial plan

* Plan: fix token usage cache accounting

Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;

* Report raw token usage without cache-rate transforms

Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;
Co-authored-by: Peli de Halleux &lt;pelikhan@users.noreply.github.com&gt;
diff --git a/.github/workflows/issue-arborist.lock.yml b/.github/workflows/issue-arborist.lock.yml
diff --git a/actions/setup/js/parse_mcp_gateway_log.cjs b/actions/setup/js/parse_mcp_gateway_log.cjs
@@ -50,7 +50,7 @@ function formatDurationMs(ms) {
  * Parses token-usage.jsonl content and returns an aggregated summary.
  * Computes effective tokens (ET) per model using the GH_AW_MODEL_MULTIPLIERS env var.
  * @param {string} jsonlContent - The token-usage.jsonl file content
- * @returns {{totalInputTokens: number, totalOutputTokens: number, totalCacheReadTokens: number, totalCacheWriteTokens: number, totalRequests: number, totalDurationMs: number, cacheEfficiency: number, totalEffectiveTokens: number, byModel: Object} | null}
+ * @returns {{totalInputTokens: number, totalOutputTokens: number, totalCacheReadTokens: number, totalCacheWriteTokens: number, totalRequests: number, totalDurationMs: number, totalEffectiveTokens: number, byModel: Object} | null}
  */
 function parseTokenUsageJsonl(jsonlContent) {
   const summary = {
@@ -60,7 +60,6 @@ function parseTokenUsageJsonl(jsonlContent) {
     totalCacheWriteTokens: 0,
     totalRequests: 0,
     totalDurationMs: 0,
-    cacheEfficiency: 0,
     totalEffectiveTokens: 0,
     byModel: {},
   };
@@ -110,11 +109,6 @@ function parseTokenUsageJsonl(jsonlContent) {
 
   if (summary.totalRequests === 0) return null;
 
-  const totalInputPlusCacheRead = summary.totalInputTokens + summary.totalCacheReadTokens;
-  if (totalInputPlusCacheRead > 0) {
-    summary.cacheEfficiency = summary.totalCacheReadTokens / totalInputPlusCacheRead;
-  }
-
   // Compute effective tokens per model and aggregate total
   let totalEffectiveTokens = 0;
   for (const [model, usage] of Object.entries(summary.byModel)) {
@@ -130,7 +124,7 @@ function parseTokenUsageJsonl(jsonlContent) {
 /**
  * Generates a markdown summary section for token usage data.
  * Includes an Effective Tokens (ET) column per model and a ● ET summary line.
- * @param {{totalInputTokens: number, totalOutputTokens: number, totalCacheReadTokens: number, totalCacheWriteTokens: number, totalRequests: number, totalDurationMs: number, cacheEfficiency: number, totalEffectiveTokens: number, byModel: Object} | null} summary
+ * @param {{totalInputTokens: number, totalOutputTokens: number, totalCacheReadTokens: number, totalCacheWriteTokens: number, totalRequests: number, totalDurationMs: number, totalEffectiveTokens: number, byModel: Object} | null} summary
  * @returns {string} Markdown section, or empty string if no data
  */
 function generateTokenUsageSummary(summary) {
@@ -159,14 +153,11 @@ function generateTokenUsageSummary(summary) {
     `| **Total** | **${summary.totalInputTokens.toLocaleString()}** | **${summary.totalOutputTokens.toLocaleString()}** | **${summary.totalCacheReadTokens.toLocaleString()}** | **${summary.totalCacheWriteTokens.toLocaleString()}** | **${totalET}** | **${summary.totalRequests}** | **${formatDurationMs(summary.totalDurationMs)}** |`
   );
 
-  // Footer line with ET summary using ● symbol and optional cache efficiency
+  // Footer line with ET summary using ● symbol
   const footerParts = [];
   if (summary.totalEffectiveTokens > 0) {
     footerParts.push(`● ${formatET(Math.round(summary.totalEffectiveTokens))}`);
   }
-  if (summary.cacheEfficiency > 0) {
-    footerParts.push(`Cache efficiency: ${(summary.cacheEfficiency * 100).toFixed(1)}%`);
-  }
   if (footerParts.length > 0) {
     lines.push(`\n_${footerParts.join(" · ")}_`);
     // Disclose the token class weights used to compute ET (required by the ET spec)
diff --git a/actions/setup/js/parse_mcp_gateway_log.test.cjs b/actions/setup/js/parse_mcp_gateway_log.test.cjs
@@ -1092,12 +1092,11 @@ not-json
       expect(summary.byModel["unknown"]).toBeDefined();
     });
 
-    test("computes cache efficiency", () => {
+    test("does not compute cache efficiency", () => {
       const content = JSON.stringify({ model: "m", input_tokens: 100, output_tokens: 10, cache_read_tokens: 900, cache_write_tokens: 0, duration_ms: 100 });
       const summary = parseTokenUsageJsonl(content);
       expect(summary).not.toBeNull();
-      // cache_read / (input + cache_read) = 900 / 1000 = 0.9
-      expect(summary.cacheEfficiency).toBeCloseTo(0.9);
+      expect(summary).not.toHaveProperty("cacheEfficiency");
     });
   });
 
@@ -1120,17 +1119,10 @@ not-json
       expect(md).toContain("**Total**");
     });
 
-    test("includes cache efficiency when non-zero", () => {
+    test("does not include cache efficiency", () => {
       const content = JSON.stringify({ model: "m", input_tokens: 100, output_tokens: 10, cache_read_tokens: 900, cache_write_tokens: 0, duration_ms: 100 });
       const summary = parseTokenUsageJsonl(content);
       const md = generateTokenUsageSummary(summary);
-      expect(md).toContain("Cache efficiency: 90.0%");
-    });
-
-    test("omits cache efficiency line when zero", () => {
-      const content = JSON.stringify({ model: "m", input_tokens: 100, output_tokens: 10, cache_read_tokens: 0, cache_write_tokens: 0, duration_ms: 100 });
-      const summary = parseTokenUsageJsonl(content);
-      const md = generateTokenUsageSummary(summary);
       expect(md).not.toContain("Cache efficiency");
     });
 
@@ -1163,16 +1155,12 @@ not-json
       expect(md).toContain("●");
     });
 
-    test("includes cache efficiency after ● ET in footer line", () => {
+    test("includes ● ET in footer line without cache efficiency", () => {
       const content = JSON.stringify({ model: "m", input_tokens: 100, output_tokens: 10, cache_read_tokens: 900, cache_write_tokens: 0, duration_ms: 100 });
       const summary = parseTokenUsageJsonl(content);
       const md = generateTokenUsageSummary(summary);
       expect(md).toContain("●");
-      expect(md).toContain("Cache efficiency: 90.0%");
-      // ET should appear before cache efficiency
-      const etIdx = md.indexOf("●");
-      const ceIdx = md.indexOf("Cache efficiency");
-      expect(etIdx).toBeLessThan(ceIdx);
+      expect(md).not.toContain("Cache efficiency");
     });
   });
 
diff --git a/pkg/cli/audit_report_render_findings.go b/pkg/cli/audit_report_render_findings.go
@@ -170,19 +170,13 @@ func renderSafeOutputSummary(summary *SafeOutputSummary) {
 
 // renderTokenUsage displays token usage data from the firewall proxy
 func renderTokenUsage(summary *TokenUsageSummary) {
-	totalTokens := summary.TotalTokens()
-	cacheTokens := summary.TotalCacheReadTokens + summary.TotalCacheWriteTokens
-
-	fmt.Fprintf(os.Stderr, "  Total:      %s tokens (%s input, %s output, %s cache)\n",
-		console.FormatNumber(totalTokens),
+	fmt.Fprintf(os.Stderr, "  Tokens:     %s input, %s output, %s cache read, %s cache write\n",
 		console.FormatNumber(summary.TotalInputTokens),
 		console.FormatNumber(summary.TotalOutputTokens),
-		console.FormatNumber(cacheTokens))
+		console.FormatNumber(summary.TotalCacheReadTokens),
+		console.FormatNumber(summary.TotalCacheWriteTokens))
 	fmt.Fprintf(os.Stderr, "  Requests:   %d (avg %s)\n",
 		summary.TotalRequests, timeutil.FormatDurationMs(summary.AvgDurationMs()))
-	if summary.CacheEfficiency > 0 {
-		fmt.Fprintf(os.Stderr, "  Cache hit:  %.1f%%\n", summary.CacheEfficiency*100)
-	}
 	fmt.Fprintln(os.Stderr)
 
 	rows := summary.ModelRows()
diff --git a/pkg/cli/audit_report_test.go b/pkg/cli/audit_report_test.go
@@ -1045,6 +1045,39 @@ func TestToolUsageAggregation(t *testing.T) {
 		"Bash should be present in tool usage")
 }
 
+func TestRenderTokenUsageDisplaysRawCountsOnly(t *testing.T) {
+	summary := &TokenUsageSummary{
+		TotalInputTokens:      100,
+		TotalOutputTokens:     200,
+		TotalCacheReadTokens:  5000,
+		TotalCacheWriteTokens: 3000,
+		TotalRequests:         2,
+		TotalDurationMs:       3000,
+		ByModel:               map[string]*ModelTokenUsage{},
+	}
+
+	oldStderr := os.Stderr
+	r, w, err := os.Pipe()
+	require.NoError(t, err)
+	os.Stderr = w
+
+	renderTokenUsage(summary)
+	require.NoError(t, w.Close())
+	os.Stderr = oldStderr
+
+	var buf bytes.Buffer
+	_, copyErr := io.Copy(&buf, r)
+	require.NoError(t, copyErr)
+
+	output := buf.String()
+	assert.Contains(t, output, "Tokens:")
+	assert.Contains(t, output, "100 input")
+	assert.Contains(t, output, "cache read")
+	assert.Contains(t, output, "cache write")
+	assert.NotContains(t, output, "Total:")
+	assert.NotContains(t, output, "Cache hit:")
+}
+
 func TestExtractDownloadedFilesEmpty(t *testing.T) {
 	// Test with nonexistent directory
 	files := extractDownloadedFiles("/nonexistent/path")
diff --git a/pkg/cli/token_usage.go b/pkg/cli/token_usage.go
@@ -168,12 +168,6 @@ func parseTokenUsageFile(filePath string, customWeights *types.TokenWeights) (*T
 		m.ResponseBytes += entry.ResponseBytes
 	}
 
-	// Compute cache efficiency: cache_read / (input + cache_read)
-	totalInputPlusCacheRead := summary.TotalInputTokens + summary.TotalCacheReadTokens
-	if totalInputPlusCacheRead > 0 {
-		summary.CacheEfficiency = float64(summary.TotalCacheReadTokens) / float64(totalInputPlusCacheRead)
-	}
-
 	tokenUsageLog.Printf("Parsed %d entries: %d input, %d output, %d cache_read, %d cache_write, %d requests",
 		lineNum, summary.TotalInputTokens, summary.TotalOutputTokens,
 		summary.TotalCacheReadTokens, summary.TotalCacheWriteTokens, summary.TotalRequests)
@@ -349,11 +343,6 @@ func parseAgentUsageFile(filePath string, customWeights *types.TokenWeights) (*T
 		ByModel:               make(map[string]*ModelTokenUsage),
 	}
 
-	totalInputPlusCacheRead := summary.TotalInputTokens + summary.TotalCacheReadTokens
-	if totalInputPlusCacheRead > 0 {
-		summary.CacheEfficiency = float64(summary.TotalCacheReadTokens) / float64(totalInputPlusCacheRead)
-	}
-
 	hasTokenData := summary.TotalInputTokens > 0 ||
 		summary.TotalOutputTokens > 0 ||
 		summary.TotalCacheReadTokens > 0 ||
diff --git a/pkg/cli/token_usage_test.go b/pkg/cli/token_usage_test.go
@@ -66,9 +66,7 @@ func TestParseTokenUsageFile(t *testing.T) {
 		assert.Equal(t, 2, summary.ByModel["claude-sonnet-4-6"].Requests, "sonnet requests")
 		assert.Equal(t, 1, summary.ByModel["claude-haiku-4-5"].Requests, "haiku requests")
 
-		// Check cache efficiency
-		expectedEfficiency := float64(55028) / float64(775+55028)
-		assert.InDelta(t, expectedEfficiency, summary.CacheEfficiency, 0.001, "cache efficiency")
+		assert.InDelta(t, 0.0, summary.CacheEfficiency, 0.001, "cache efficiency is not computed from raw token counts")
 	})
 
 	t.Run("extracts ambient context from first chronological invocation", func(t *testing.T) {
@@ -327,19 +325,7 @@ func TestAnalyzeTokenUsage(t *testing.T) {
 }
 
 func TestCacheEfficiency(t *testing.T) {
-	t.Run("zero when no cache reads", func(t *testing.T) {
-		tmpDir := testutil.TempDir(t, "cache-eff")
-		filePath := filepath.Join(tmpDir, "token-usage.jsonl")
-		content := `{"provider":"anthropic","model":"sonnet","input_tokens":100,"output_tokens":50,"cache_read_tokens":0,"cache_write_tokens":0,"duration_ms":100}`
-		require.NoError(t, os.WriteFile(filePath, []byte(content+"\n"), 0o644))
-
-		summary, err := parseTokenUsageFile(filePath, nil)
-		require.NoError(t, err)
-		require.NotNil(t, summary)
-		assert.InDelta(t, 0.0, summary.CacheEfficiency, 0.001, "cache efficiency should be 0 with no cache reads")
-	})
-
-	t.Run("high efficiency with mostly cache reads", func(t *testing.T) {
+	t.Run("remains zero to avoid transforming raw token counts", func(t *testing.T) {
 		tmpDir := testutil.TempDir(t, "cache-eff")
 		filePath := filepath.Join(tmpDir, "token-usage.jsonl")
 		content := `{"provider":"anthropic","model":"sonnet","input_tokens":100,"output_tokens":50,"cache_read_tokens":9900,"cache_write_tokens":0,"duration_ms":100}`
@@ -348,6 +334,6 @@ func TestCacheEfficiency(t *testing.T) {
 		summary, err := parseTokenUsageFile(filePath, nil)
 		require.NoError(t, err)
 		require.NotNil(t, summary)
-		assert.InDelta(t, 0.99, summary.CacheEfficiency, 0.001, "cache efficiency should be ~99%")
+		assert.InDelta(t, 0.0, summary.CacheEfficiency, 0.001, "cache efficiency should remain unset")
 	})
 }