Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions miot-harness/src/miot_harness/observability/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,9 @@ def on_llm_end(
"cache_read_input_tokens": usage.cache_read_input_tokens,
"cache_creation_input_tokens": usage.cache_creation_input_tokens,
}
if cost_usd is not None:
data["cost_usd"] = cost_usd
# NOTE: cost_usd is deliberately NOT emitted to the client. The dollar
# cost stays server-side (span attribute above / eval + provenance logs)
# so we can track our own spend without exposing it to the tenant.
# Fail open: the progress sink is an observability concern,
# so a buggy or saturated event bus must NOT bubble up and
# tear down the LLM call. Catch Exception (not BaseException)
Expand Down
5 changes: 3 additions & 2 deletions miot-harness/tests/observability/test_usage_recorded_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def test_callback_emits_usage_recorded_when_progress_wired(
usage_events = [e for e in events if e.type == "usage.recorded"]
assert len(usage_events) == 1
payload = usage_events[0].data
# The dollar cost is NEVER shared with the client, even for a priced
# model — it stays server-side (span attribute + eval/provenance logs).
# Total input (200) minus cache_read (5) minus cache_creation (3) = 192
assert payload == {
"agent": "filter_expert",
Expand All @@ -64,9 +66,8 @@ def test_callback_emits_usage_recorded_when_progress_wired(
"output_tokens": 80,
"cache_read_input_tokens": 5,
"cache_creation_input_tokens": 3,
"cost_usd": payload["cost_usd"],
}
assert payload["cost_usd"] > 0
assert "cost_usd" not in payload
assert usage_events[0].run_id == "run_abc"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ function usage(partial: Partial<UsageTotals> = {}): UsageTotals {
outputTokens: 0,
cacheReadTokens: 0,
cacheCreationTokens: 0,
costUsd: 0,
lastAgent: null,
lastCostUsd: null,
...partial,
};
}
Expand Down Expand Up @@ -44,23 +42,22 @@ describe("<FooterLine />", () => {
expect(lastFrame() ?? "").toContain("profile staging");
});

it("shows a usage segment when usageTotals has any tokens", () => {
it("shows a token usage segment when usageTotals has any tokens", () => {
const { lastFrame } = render(
<FooterLine
{...props({
usageTotals: usage({
inputTokens: 1234,
outputTokens: 56,
costUsd: 0.0123,
lastAgent: "synthesizer",
lastCostUsd: 0.0123,
}),
})}
/>,
);
const frame = lastFrame() ?? "";
expect(frame).toContain("1234→56");
expect(frame).toContain("$0.0123");
// The dollar cost is never shown to the client.
expect(frame).not.toContain("$");
});

it("hides the usage segment when there are no tokens yet", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ function emptySlice(): TranscriptSlice {
outputTokens: 0,
cacheReadTokens: 0,
cacheCreationTokens: 0,
costUsd: 0,
lastAgent: null,
lastCostUsd: null,
},
};
}
Expand Down Expand Up @@ -434,7 +432,7 @@ describe("transcript projector — thinking + usage (plan: SSE rich events)", ()
expect(s2.transcript[0]).toMatchObject({ kind: "thinking", status: "complete" });
});

it("usage.recorded accumulates totals across calls", () => {
it("usage.recorded accumulates token totals across calls", () => {
const ctx = mkCtx();
const s1 = applyHarnessEvent(
emptySlice(),
Expand All @@ -446,7 +444,6 @@ describe("transcript projector — thinking + usage (plan: SSE rich events)", ()
output_tokens: 100,
cache_read_input_tokens: 50,
cache_creation_input_tokens: 25,
cost_usd: 0.0042,
},
}),
"r1",
Expand All @@ -457,9 +454,7 @@ describe("transcript projector — thinking + usage (plan: SSE rich events)", ()
outputTokens: 100,
cacheReadTokens: 50,
cacheCreationTokens: 25,
costUsd: 0.0042,
lastAgent: "filter_expert",
lastCostUsd: 0.0042,
});

const s2 = applyHarnessEvent(
Expand All @@ -480,8 +475,9 @@ describe("transcript projector — thinking + usage (plan: SSE rich events)", ()
expect(s2.usageTotals.inputTokens).toBe(3000);
expect(s2.usageTotals.outputTokens).toBe(300);
expect(s2.usageTotals.lastAgent).toBe("synthesizer");
expect(s2.usageTotals.lastCostUsd).toBeNull();
expect(s2.usageTotals.costUsd).toBeCloseTo(0.0042);
// Dollar cost is never carried on the client-side usage totals.
expect(s2.usageTotals).not.toHaveProperty("costUsd");
Comment thread
odtorres marked this conversation as resolved.
expect(s2.usageTotals).not.toHaveProperty("lastCostUsd");
});

it("agent.completed is intentionally dropped from the transcript", () => {
Expand Down
3 changes: 1 addition & 2 deletions turbo-repo/packages/miot-chat/src/tui/chrome/FooterLine.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ export function FooterLine(props: FooterLineProps): React.ReactElement {
segments.push(`ctx≈${props.approxTokens}tok (${props.contextPercent}%)`);
const u = props.usageTotals;
if (u && (u.inputTokens > 0 || u.outputTokens > 0)) {
const cost = u.costUsd > 0 ? ` $${u.costUsd.toFixed(4)}` : "";
segments.push(`${u.inputTokens}→${u.outputTokens}${cost}`);
segments.push(`${u.inputTokens}→${u.outputTokens}`);
}
segments.push(props.baseUrl);
if (props.profileName) {
Expand Down
8 changes: 2 additions & 6 deletions turbo-repo/packages/miot-chat/src/tui/session/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,23 @@ export type TranscriptItem =

/**
* Running totals across the conversation. Reset by CLEAR /
* RESET_CONVERSATION; accumulated turn-over-turn. `lastCostUsd` is
* the cost of the most recent LLM call (chip footer uses it).
* RESET_CONVERSATION; accumulated turn-over-turn. Dollar cost is
* intentionally not tracked here — it is not shared with the client.
*/
export interface UsageTotals {
inputTokens: number;
outputTokens: number;
cacheReadTokens: number;
cacheCreationTokens: number;
costUsd: number;
lastAgent: string | null;
lastCostUsd: number | null;
}

export const ZERO_USAGE: UsageTotals = {
inputTokens: 0,
outputTokens: 0,
cacheReadTokens: 0,
cacheCreationTokens: 0,
costUsd: 0,
lastAgent: null,
lastCostUsd: null,
};

export interface PendingApproval {
Expand Down
5 changes: 0 additions & 5 deletions turbo-repo/packages/miot-chat/src/tui/transcript/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ export function applyHarnessEvent(
typeof event.data.cache_creation_input_tokens === "number"
? event.data.cache_creation_input_tokens
: 0;
const cost =
typeof event.data.cost_usd === "number" ? event.data.cost_usd : 0;
const agent =
typeof event.data.agent === "string" ? event.data.agent : null;
return {
Expand All @@ -204,10 +202,7 @@ export function applyHarnessEvent(
outputTokens: slice.usageTotals.outputTokens + outT,
cacheReadTokens: slice.usageTotals.cacheReadTokens + cacheR,
cacheCreationTokens: slice.usageTotals.cacheCreationTokens + cacheC,
costUsd: slice.usageTotals.costUsd + cost,
lastAgent: agent,
lastCostUsd:
typeof event.data.cost_usd === "number" ? event.data.cost_usd : null,
},
};
}
Expand Down
1 change: 0 additions & 1 deletion turbo-repo/packages/miot-harness-client/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ export interface UsageRecordedData {
output_tokens: number;
cache_read_input_tokens: number;
cache_creation_input_tokens: number;
cost_usd?: number;
}

export const TERMINAL_EVENT_TYPES = new Set<HarnessEventType>([
Expand Down
Loading