Skip to content

Commit d6ae06d

Browse files
authored
fix(otel): eliminate gen_ai.usage.* double-counting and gen_ai.request.model duplicate on agent span (#30350)
1 parent e8eec7c commit d6ae06d

2 files changed

Lines changed: 66 additions & 20 deletions

File tree

actions/setup/js/send_otlp_span.cjs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,37 +1252,44 @@ async function sendJobConclusionSpan(spanName, options = {}) {
12521252
}
12531253
}
12541254

1255-
// Read agent token-usage counters and add the per-category breakdown to the
1256-
// conclusion span so a single query is sufficient for observability (no join
1257-
// to the child agent span required).
1255+
// Read agent token-usage counters and build per-category breakdown attributes.
1256+
// These are attached exclusively to the dedicated agent span (when one is emitted)
1257+
// to avoid double-counting in backends that sum gen_ai.usage.* across all spans.
1258+
// When no agent span is emitted the attributes fall through to the conclusion span
1259+
// so a single query is still sufficient for observability.
12581260
const agentUsage = readJSONIfExists("/tmp/gh-aw/agent_usage.json") || {};
1261+
const usageAttrs = [];
12591262
if (typeof agentUsage.input_tokens === "number" && agentUsage.input_tokens > 0) {
1260-
attributes.push(buildAttr("gen_ai.usage.input_tokens", agentUsage.input_tokens));
1263+
usageAttrs.push(buildAttr("gen_ai.usage.input_tokens", agentUsage.input_tokens));
12611264
}
12621265
if (typeof agentUsage.output_tokens === "number" && agentUsage.output_tokens > 0) {
1263-
attributes.push(buildAttr("gen_ai.usage.output_tokens", agentUsage.output_tokens));
1266+
usageAttrs.push(buildAttr("gen_ai.usage.output_tokens", agentUsage.output_tokens));
12641267
}
12651268
if (typeof agentUsage.cache_read_tokens === "number" && agentUsage.cache_read_tokens > 0) {
1266-
attributes.push(buildAttr("gen_ai.usage.cache_read.input_tokens", agentUsage.cache_read_tokens));
1269+
usageAttrs.push(buildAttr("gen_ai.usage.cache_read.input_tokens", agentUsage.cache_read_tokens));
12671270
}
12681271
if (typeof agentUsage.cache_write_tokens === "number" && agentUsage.cache_write_tokens > 0) {
1269-
attributes.push(buildAttr("gen_ai.usage.cache_creation.input_tokens", agentUsage.cache_write_tokens));
1272+
usageAttrs.push(buildAttr("gen_ai.usage.cache_creation.input_tokens", agentUsage.cache_write_tokens));
12701273
}
12711274

12721275
const endpoints = parseOTLPEndpoints();
12731276
const conclusionSpanId = generateSpanId();
1277+
let agentSpanEmitted = false;
12741278
if (jobName === "agent" && typeof agentStartMs === "number" && agentStartMs > 0 && typeof agentEndMs === "number" && agentEndMs > agentStartMs) {
1279+
agentSpanEmitted = true;
12751280
const agentSpanEvents = buildSpanEvents(agentEndMs);
12761281

12771282
// Build OTel GenAI semantic convention attributes for the dedicated agent span.
12781283
// These follow the OpenTelemetry GenAI specification and enable out-of-the-box
12791284
// LLM dashboards in Grafana, Datadog, and Honeycomb without custom mappings.
1280-
// Token-usage attributes are inherited from the conclusion span attributes above.
1281-
const agentAttributes = [...attributes];
1285+
// Token-usage attributes are included here (and only here) to prevent
1286+
// double-counting with the conclusion span.
1287+
const agentAttributes = [...attributes, ...usageAttrs];
12821288
// gen_ai.operation.name is Required by the OTel GenAI spec for inference spans.
12831289
// All gh-aw agent executions are chat-style LLM completions.
12841290
agentAttributes.push(buildAttr("gen_ai.operation.name", "chat"));
1285-
if (model) agentAttributes.push(buildAttr("gen_ai.request.model", model));
1291+
// gen_ai.request.model is already present in agentAttributes via the spread above
1292+
// (added to attributes at the top of this function); do not push again.
12861293
// gen_ai.system is the OTel GenAI standard attribute for the LLM system/provider.
12871294
// Map the gh-aw internal engine ID to the standardized value so backends can apply
12881295
// native GenAI dashboard detection. The original engine ID is preserved in gh-aw.engine.
@@ -1317,6 +1324,12 @@ async function sendJobConclusionSpan(spanName, options = {}) {
13171324
}
13181325
}
13191326

1327+
// When no agent span was emitted, attach token-usage attributes to the conclusion span
1328+
// so a single query remains sufficient for observability (no join required).
1329+
if (!agentSpanEmitted) {
1330+
attributes.push(...usageAttrs);
1331+
}
1332+
13201333
const payload = buildOTLPPayload({
13211334
traceId,
13221335
spanId: conclusionSpanId,

actions/setup/js/send_otlp_span.test.cjs

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,6 +2362,36 @@ describe("sendJobConclusionSpan", () => {
23622362
expect(attrs["gen_ai.workflow.name"]).toBe("otel-advisor");
23632363
});
23642364

2365+
it("does not duplicate gen_ai.request.model on the agent span", async () => {
2366+
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
2367+
vi.stubGlobal("fetch", mockFetch);
2368+
2369+
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]);
2370+
process.env.INPUT_JOB_NAME = "agent";
2371+
2372+
const startMs = 1_700_000_000_000;
2373+
const endMs = 1_700_000_005_000;
2374+
const statSpy = vi.spyOn(fs, "statSync").mockReturnValue(/** @type {Partial<fs.Stats>} */ { mtimeMs: endMs });
2375+
const readFileSpy = vi.spyOn(fs, "readFileSync").mockImplementation(filePath => {
2376+
if (filePath === "/tmp/gh-aw/aw_info.json") {
2377+
return JSON.stringify({ model: "gpt-4o", engine_id: "codex" });
2378+
}
2379+
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
2380+
});
2381+
2382+
await sendJobConclusionSpan("gh-aw.agent.conclusion", { startMs });
2383+
2384+
statSpy.mockRestore();
2385+
readFileSpy.mockRestore();
2386+
2387+
const agentBody = JSON.parse(mockFetch.mock.calls[0][1].body);
2388+
const agentSpan = agentBody.resourceSpans[0].scopeSpans[0].spans[0];
2389+
const modelKeys = agentSpan.attributes.filter(a => a.key === "gen_ai.request.model");
2390+
// gen_ai.request.model must appear exactly once — no duplicate from a second push
2391+
expect(modelKeys).toHaveLength(1);
2392+
expect(modelKeys[0].value.stringValue).toBe("gpt-4o");
2393+
});
2394+
23652395
it("omits gen_ai.request.model, gen_ai.system, gh-aw.engine and gen_ai.workflow.name from the agent span when model, engine_id and workflow_name are absent", async () => {
23662396
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
23672397
vi.stubGlobal("fetch", mockFetch);
@@ -3705,7 +3735,7 @@ describe("sendJobConclusionSpan", () => {
37053735
statSpy.mockRestore();
37063736
});
37073737

3708-
it("includes all four gen_ai token breakdown attributes on the conclusion span when agent_usage.json is present", async () => {
3738+
it("omits gen_ai token breakdown attributes from conclusion span when agent sub-span is emitted (token attrs go to agent span only)", async () => {
37093739
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
37103740
vi.stubGlobal("fetch", mockFetch);
37113741

@@ -3724,11 +3754,14 @@ describe("sendJobConclusionSpan", () => {
37243754
// mockFetch.mock.calls[0] is the agent span, [1] is the conclusion span
37253755
const conclusionBody = JSON.parse(mockFetch.mock.calls[1][1].body);
37263756
const conclusionSpan = conclusionBody.resourceSpans[0].scopeSpans[0].spans[0];
3727-
const attrs = Object.fromEntries(conclusionSpan.attributes.map(a => [a.key, a.value.intValue ?? a.value.stringValue]));
3728-
expect(attrs["gen_ai.usage.input_tokens"]).toBe(48200);
3729-
expect(attrs["gen_ai.usage.output_tokens"]).toBe(1350);
3730-
expect(attrs["gen_ai.usage.cache_read.input_tokens"]).toBe(41000);
3731-
expect(attrs["gen_ai.usage.cache_creation.input_tokens"]).toBe(3100);
3757+
const conclusionKeys = conclusionSpan.attributes.map(a => a.key);
3758+
// Token-usage attributes must not appear on the conclusion span when an agent
3759+
// sub-span is emitted; they live exclusively on the agent span to prevent
3760+
// double-counting in backends that sum gen_ai.usage.* across all spans.
3761+
expect(conclusionKeys).not.toContain("gen_ai.usage.input_tokens");
3762+
expect(conclusionKeys).not.toContain("gen_ai.usage.output_tokens");
3763+
expect(conclusionKeys).not.toContain("gen_ai.usage.cache_read.input_tokens");
3764+
expect(conclusionKeys).not.toContain("gen_ai.usage.cache_creation.input_tokens");
37323765
});
37333766

37343767
it("includes gen_ai token breakdown on conclusion span even when no agent sub-span is emitted", async () => {
@@ -3779,7 +3812,7 @@ describe("sendJobConclusionSpan", () => {
37793812
expect(keys).not.toContain("gen_ai.usage.cache_creation.input_tokens");
37803813
});
37813814

3782-
it("omits a gen_ai token attribute from the conclusion span when its value is zero", async () => {
3815+
it("omits all gen_ai token breakdown attributes from conclusion span regardless of zero-values when agent sub-span is emitted", async () => {
37833816
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
37843817
vi.stubGlobal("fetch", mockFetch);
37853818

@@ -3798,11 +3831,11 @@ describe("sendJobConclusionSpan", () => {
37983831
// mockFetch.mock.calls[0] is the agent span, [1] is the conclusion span
37993832
const conclusionBody = JSON.parse(mockFetch.mock.calls[1][1].body);
38003833
const conclusionSpan = conclusionBody.resourceSpans[0].scopeSpans[0].spans[0];
3801-
const attrs = Object.fromEntries(conclusionSpan.attributes.map(a => [a.key, a.value.intValue ?? a.value.stringValue]));
3802-
expect(attrs["gen_ai.usage.input_tokens"]).toBe(1000);
3803-
expect(attrs["gen_ai.usage.cache_read.input_tokens"]).toBe(500);
38043834
const keys = conclusionSpan.attributes.map(a => a.key);
3835+
// No token-usage attributes on conclusion span when an agent sub-span is emitted.
3836+
expect(keys).not.toContain("gen_ai.usage.input_tokens");
38053837
expect(keys).not.toContain("gen_ai.usage.output_tokens");
3838+
expect(keys).not.toContain("gen_ai.usage.cache_read.input_tokens");
38063839
expect(keys).not.toContain("gen_ai.usage.cache_creation.input_tokens");
38073840
});
38083841
});

0 commit comments

Comments
 (0)