Skip to content

Commit fd3630c

Browse files
authored
fix: enforce 48h data retention on AIC usage cache entries (#39084)
1 parent f5260c4 commit fd3630c

4 files changed

Lines changed: 228 additions & 19 deletions

actions/setup/js/check_daily_aic_workflow_guardrail.cjs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ const { createRateLimitAwareGithub, fetchAndLogRateLimit } = require("./github_r
1313

1414
const PRIMARY_GUARDRAIL_ARTIFACT_NAMES = ["usage"];
1515
const DAILY_WORKFLOW_WINDOW_MS = 24 * 60 * 60 * 1000;
16+
/** Cache entries older than this threshold (in ms) are skipped when loading. */
17+
const CACHE_RETENTION_MS = 48 * 60 * 60 * 1000;
1618
const MAX_WORKFLOW_RUN_PAGES = 10;
1719
const RATE_LIMIT_RESERVE = 100;
1820
const REQUEST_OVERHEAD_BUDGET = MAX_WORKFLOW_RUN_PAGES + 4;
@@ -94,7 +96,11 @@ function shouldSkipDailyAICGuardrail() {
9496

9597
/**
9698
* Loads the per-workflow usage cache from the JSONL file restored by the activation job's
97-
* cache-restore step. Each line is a JSON object `{ run_id: number, aic: number }`.
99+
* cache-restore step. Each line is a JSON object `{ run_id: number, aic: number, timestamp?: string }`.
100+
*
101+
* Entries with a `timestamp` older than {@link CACHE_RETENTION_MS} (48 h) are skipped so that
102+
* stale data cannot inflate the daily-AIC total. Entries without a `timestamp` (written by an
103+
* older version of the write script) are kept for backward compatibility.
98104
*
99105
* Returns a `Map<runId, aic>` so that callers can check whether a prior run's AIC is already
100106
* known without downloading the run's artifact from the GitHub API.
@@ -112,14 +118,25 @@ function loadAICUsageCache(filePath) {
112118
return cache;
113119
}
114120
const content = fs.readFileSync(cachePath, "utf8");
121+
const now = Date.now();
122+
const cutoff = now - CACHE_RETENTION_MS;
115123
let loaded = 0;
124+
let skippedStale = 0;
116125
for (const rawLine of content.split("\n")) {
117126
const line = rawLine.trim();
118127
if (!line || !line.startsWith("{")) {
119128
continue;
120129
}
121130
try {
122131
const entry = JSON.parse(line);
132+
// Skip entries that have a timestamp and are older than the retention window.
133+
if (typeof entry?.timestamp === "string") {
134+
const ts = Date.parse(entry.timestamp);
135+
if (Number.isFinite(ts) && ts < cutoff) {
136+
skippedStale++;
137+
continue;
138+
}
139+
}
123140
const runId = Number(entry?.run_id);
124141
const rawAic = entry?.aic;
125142
const aic = typeof rawAic === "number" ? rawAic : NaN;
@@ -131,7 +148,7 @@ function loadAICUsageCache(filePath) {
131148
// Ignore malformed lines.
132149
}
133150
}
134-
logDailyGuardrail("Loaded usage cache", { path: cachePath, entriesLoaded: loaded });
151+
logDailyGuardrail("Loaded usage cache", { path: cachePath, entriesLoaded: loaded, skippedStale });
135152
} catch (err) {
136153
logDailyGuardrail("Failed to load usage cache; proceeding without it", {
137154
path: cachePath,

actions/setup/js/check_daily_aic_workflow_guardrail.test.cjs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,5 +612,30 @@ describe("check_daily_aic_workflow_guardrail", () => {
612612
expect(cache.has(603)).toBe(false);
613613
expect(cache.get(604)).toBe(4.2);
614614
});
615+
616+
it("loads entries that have a recent timestamp (within 48 h)", () => {
617+
const recentTimestamp = new Date(Date.now() - 60 * 60 * 1000).toISOString(); // 1 hour ago
618+
fs.writeFileSync(cacheFile, JSON.stringify({ run_id: 701, aic: 8.0, timestamp: recentTimestamp }) + "\n", "utf8");
619+
const cache = exports.loadAICUsageCache(cacheFile);
620+
expect(cache.has(701)).toBe(true);
621+
expect(cache.get(701)).toBe(8.0);
622+
});
623+
624+
it("skips entries whose timestamp is older than 48 h", () => {
625+
const staleTimestamp = new Date(Date.now() - 49 * 60 * 60 * 1000).toISOString(); // 49 hours ago
626+
const recentTimestamp = new Date(Date.now() - 30 * 60 * 1000).toISOString(); // 30 minutes ago
627+
fs.writeFileSync(cacheFile, [JSON.stringify({ run_id: 801, aic: 3.0, timestamp: staleTimestamp }), JSON.stringify({ run_id: 802, aic: 5.0, timestamp: recentTimestamp })].join("\n") + "\n", "utf8");
628+
const cache = exports.loadAICUsageCache(cacheFile);
629+
expect(cache.has(801)).toBe(false);
630+
expect(cache.has(802)).toBe(true);
631+
expect(cache.get(802)).toBe(5.0);
632+
});
633+
634+
it("keeps entries without a timestamp (backward compatibility)", () => {
635+
fs.writeFileSync(cacheFile, JSON.stringify({ run_id: 901, aic: 2.5 }) + "\n", "utf8");
636+
const cache = exports.loadAICUsageCache(cacheFile);
637+
expect(cache.has(901)).toBe(true);
638+
expect(cache.get(901)).toBe(2.5);
639+
});
615640
});
616641
});

actions/setup/js/write_daily_aic_usage_cache.cjs

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ const { getErrorMessage } = require("./error_helpers.cjs");
2020
/** Path where the restored (and updated) usage cache lives on the runner. */
2121
const CACHE_FILE_PATH = "/tmp/gh-aw/agentic-workflow-usage-cache.jsonl";
2222

23+
/** Entries older than this threshold (in ms) are pruned when rewriting the cache. */
24+
const CACHE_RETENTION_MS = 48 * 60 * 60 * 1000;
25+
2326
/**
2427
* Directory prepared by the "Collect usage artifact files" step in the conclusion job.
2528
* Contains agent_usage.jsonl and agent/token_usage.jsonl which mirror the contents of
@@ -47,12 +50,18 @@ function logCache(message, details) {
4750
}
4851

4952
/**
50-
* Appends a `{run_id, aic}` JSONL entry to the cache file, preserving any existing entries
51-
* that were restored from the previous cache snapshot.
53+
* Appends a `{run_id, aic, timestamp}` JSONL entry to the cache file, preserving any existing
54+
* entries that were restored from the previous cache snapshot and are within the 48-hour
55+
* retention window. Entries older than {@link CACHE_RETENTION_MS} are pruned to keep the
56+
* cache file bounded.
5257
*
58+
* @param {string} [cacheFilePath] Override the cache file path (defaults to {@link CACHE_FILE_PATH}; useful in tests).
59+
* @param {string} [usageDir] Override the usage directory (defaults to {@link USAGE_DIR}; useful in tests).
5360
* @returns {Promise<void>}
5461
*/
55-
async function main() {
62+
async function mainWithPaths(cacheFilePath, usageDir) {
63+
const cachePath = cacheFilePath || CACHE_FILE_PATH;
64+
const usageDirPath = usageDir || USAGE_DIR;
5665
try {
5766
const runId = Number(process.env.GITHUB_RUN_ID || 0);
5867
if (!runId) {
@@ -61,8 +70,8 @@ async function main() {
6170
}
6271

6372
// Compute AIC from the usage JSONL files prepared by buildUsageArtifactUploadSteps.
64-
const usageFiles = findJSONLFiles(USAGE_DIR);
65-
logCache("Scanning usage JSONL files", { dir: USAGE_DIR, count: usageFiles.length, files: usageFiles });
73+
const usageFiles = findJSONLFiles(usageDirPath);
74+
logCache("Scanning usage JSONL files", { dir: usageDirPath, count: usageFiles.length, files: usageFiles });
6675
const aic = sumAICFromUsageJSONLFiles(usageFiles);
6776
logCache("Computed AIC for current run", { runId, aic });
6877

@@ -76,32 +85,67 @@ async function main() {
7685
}
7786

7887
// Read existing cache content (restored from the previous run's cache snapshot, if any).
79-
let existingLines = "";
88+
// Entries with a `timestamp` older than CACHE_RETENTION_MS are pruned to keep the file
89+
// bounded. Entries without a `timestamp` (written by an older version of this script)
90+
// are preserved for backward compatibility.
91+
/** @type {string[]} */
92+
let keptLines = [];
8093
try {
81-
if (fs.existsSync(CACHE_FILE_PATH)) {
82-
existingLines = fs.readFileSync(CACHE_FILE_PATH, "utf8").trimEnd();
83-
const lineCount = existingLines ? existingLines.split("\n").length : 0;
84-
logCache("Loaded existing cache entries", { path: CACHE_FILE_PATH, lineCount });
94+
if (fs.existsSync(cachePath)) {
95+
const raw = fs.readFileSync(cachePath, "utf8").trimEnd();
96+
const now = Date.now();
97+
const cutoff = now - CACHE_RETENTION_MS;
98+
let total = 0;
99+
let pruned = 0;
100+
for (const rawLine of raw.split("\n")) {
101+
const line = rawLine.trim();
102+
if (!line) continue;
103+
total++;
104+
try {
105+
const entry = JSON.parse(line);
106+
if (typeof entry?.timestamp === "string") {
107+
const ts = Date.parse(entry.timestamp);
108+
if (Number.isFinite(ts) && ts < cutoff) {
109+
pruned++;
110+
continue;
111+
}
112+
}
113+
keptLines.push(line);
114+
} catch {
115+
// Preserve lines that cannot be parsed (defensive: avoids data loss).
116+
keptLines.push(line);
117+
}
118+
}
119+
logCache("Loaded existing cache entries", { path: cachePath, total, kept: keptLines.length, pruned });
85120
} else {
86-
logCache("No existing cache file found; starting fresh", { path: CACHE_FILE_PATH });
121+
logCache("No existing cache file found; starting fresh", { path: cachePath });
87122
}
88123
} catch (readErr) {
89124
core.warning(`[daily-aic-cache] Could not read existing cache file: ${getErrorMessage(readErr)}`);
90125
}
91126

92127
// Build the updated JSONL content.
93-
const newEntry = JSON.stringify({ run_id: runId, aic });
94-
const updatedContent = existingLines ? `${existingLines}\n${newEntry}\n` : `${newEntry}\n`;
128+
const newEntry = JSON.stringify({ run_id: runId, aic, timestamp: new Date().toISOString() });
129+
const updatedContent = keptLines.length > 0 ? `${keptLines.join("\n")}\n${newEntry}\n` : `${newEntry}\n`;
95130

96131
// Ensure the directory exists and write the updated file.
97-
const dir = path.dirname(CACHE_FILE_PATH);
132+
const dir = path.dirname(cachePath);
98133
fs.mkdirSync(dir, { recursive: true });
99-
fs.writeFileSync(CACHE_FILE_PATH, updatedContent, "utf8");
100-
logCache("Wrote cache entry", { runId, aic, path: CACHE_FILE_PATH });
134+
fs.writeFileSync(cachePath, updatedContent, "utf8");
135+
logCache("Wrote cache entry", { runId, aic, path: cachePath });
101136
} catch (error) {
102137
// Non-fatal: a cache write failure should never block the conclusion job.
103138
core.warning(`[daily-aic-cache] Failed to write usage cache: ${getErrorMessage(error)}`);
104139
}
105140
}
106141

107-
module.exports = { main };
142+
/**
143+
* Entry point called from the GitHub Actions step.
144+
*
145+
* @returns {Promise<void>}
146+
*/
147+
async function main() {
148+
return mainWithPaths();
149+
}
150+
151+
module.exports = { main, mainWithPaths };
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// @ts-check
2+
import fs from "fs";
3+
import os from "os";
4+
import path from "path";
5+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
6+
7+
let exports;
8+
9+
describe("write_daily_aic_usage_cache", () => {
10+
let tmpDir;
11+
let cacheFile;
12+
let usageDir;
13+
14+
beforeEach(async () => {
15+
vi.resetModules();
16+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "write-aic-cache-test-"));
17+
cacheFile = path.join(tmpDir, "agentic-workflow-usage-cache.jsonl");
18+
usageDir = path.join(tmpDir, "usage");
19+
fs.mkdirSync(usageDir, { recursive: true });
20+
21+
global.core = { info: vi.fn(), warning: vi.fn(), error: vi.fn(), setFailed: vi.fn() };
22+
process.env.GITHUB_RUN_ID = "12345";
23+
24+
const mod = await import("./write_daily_aic_usage_cache.cjs");
25+
exports = mod.default || mod;
26+
});
27+
28+
afterEach(() => {
29+
fs.rmSync(tmpDir, { recursive: true, force: true });
30+
delete global.core;
31+
delete process.env.GITHUB_RUN_ID;
32+
});
33+
34+
/**
35+
* Writes a usage JSONL file in the temp usage directory so that sumAICFromUsageJSONLFiles
36+
* can pick it up. The entry uses the `ai_credits` field for simplicity.
37+
*
38+
* @param {number} aiCredits
39+
*/
40+
function writeUsageFile(aiCredits) {
41+
fs.writeFileSync(path.join(usageDir, "agent_usage.jsonl"), JSON.stringify({ ai_credits: aiCredits }) + "\n", "utf8");
42+
}
43+
44+
/**
45+
* Invoke the main() function after patching the module-level paths to point to the
46+
* temp directory. We call the function directly on the in-process module instance.
47+
*
48+
* @returns {Promise<void>}
49+
*/
50+
async function runMain() {
51+
// Patch the module-level path constants by calling main() on the already-imported
52+
// module; to make the paths configurable we call the exported helper that accepts
53+
// explicit path arguments (defined below), falling back to swapping env vars.
54+
await exports.mainWithPaths(cacheFile, usageDir);
55+
}
56+
57+
it("writes a new entry with run_id, aic, and a timestamp when no cache file exists", async () => {
58+
writeUsageFile(7.5);
59+
await runMain();
60+
61+
const content = fs.readFileSync(cacheFile, "utf8").trim();
62+
const entry = JSON.parse(content);
63+
expect(entry.run_id).toBe(12345);
64+
expect(entry.aic).toBe(7.5);
65+
expect(typeof entry.timestamp).toBe("string");
66+
// Timestamp should be a valid ISO 8601 date within the last minute.
67+
const ts = Date.parse(entry.timestamp);
68+
expect(Number.isFinite(ts)).toBe(true);
69+
expect(ts).toBeLessThanOrEqual(Date.now());
70+
expect(ts).toBeGreaterThan(Date.now() - 60_000);
71+
});
72+
73+
it("appends to an existing cache file and preserves entries within 48 h", async () => {
74+
const recentTs = new Date(Date.now() - 2 * 60 * 60 * 1000).toISOString(); // 2 hours ago
75+
fs.writeFileSync(cacheFile, JSON.stringify({ run_id: 9999, aic: 3.0, timestamp: recentTs }) + "\n", "utf8");
76+
77+
writeUsageFile(5.0);
78+
await runMain();
79+
80+
const lines = fs.readFileSync(cacheFile, "utf8").trim().split("\n");
81+
expect(lines).toHaveLength(2);
82+
const first = JSON.parse(lines[0]);
83+
expect(first.run_id).toBe(9999);
84+
const second = JSON.parse(lines[1]);
85+
expect(second.run_id).toBe(12345);
86+
expect(second.aic).toBe(5.0);
87+
});
88+
89+
it("prunes existing entries whose timestamp is older than 48 h", async () => {
90+
const staleTs = new Date(Date.now() - 50 * 60 * 60 * 1000).toISOString(); // 50 hours ago
91+
const recentTs = new Date(Date.now() - 1 * 60 * 60 * 1000).toISOString(); // 1 hour ago
92+
fs.writeFileSync(cacheFile, [JSON.stringify({ run_id: 1001, aic: 1.0, timestamp: staleTs }), JSON.stringify({ run_id: 1002, aic: 2.0, timestamp: recentTs })].join("\n") + "\n", "utf8");
93+
94+
writeUsageFile(4.0);
95+
await runMain();
96+
97+
const lines = fs.readFileSync(cacheFile, "utf8").trim().split("\n");
98+
const runIds = lines.map(line => JSON.parse(line).run_id);
99+
// Stale entry 1001 must be pruned; recent 1002 and new 12345 kept.
100+
expect(runIds).not.toContain(1001);
101+
expect(runIds).toContain(1002);
102+
expect(runIds).toContain(12345);
103+
});
104+
105+
it("preserves entries without a timestamp (backward compatibility)", async () => {
106+
fs.writeFileSync(cacheFile, JSON.stringify({ run_id: 2001, aic: 8.0 }) + "\n", "utf8");
107+
108+
writeUsageFile(1.0);
109+
await runMain();
110+
111+
const lines = fs.readFileSync(cacheFile, "utf8").trim().split("\n");
112+
const runIds = lines.map(line => JSON.parse(line).run_id);
113+
expect(runIds).toContain(2001);
114+
expect(runIds).toContain(12345);
115+
});
116+
117+
it("skips writing when GITHUB_RUN_ID is not set", async () => {
118+
delete process.env.GITHUB_RUN_ID;
119+
writeUsageFile(5.0);
120+
await runMain();
121+
expect(fs.existsSync(cacheFile)).toBe(false);
122+
});
123+
});

0 commit comments

Comments
 (0)