Skip to content

Commit 7defc03

Browse files
authored
Fix push_to_pull_request_branch patch size to use incremental net diff (#28198)
1 parent 5ab2c9b commit 7defc03

9 files changed

Lines changed: 647 additions & 67 deletions

.changeset/patch-push-pr-incremental-size-check.md

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

actions/setup/js/generate_git_patch.cjs

Lines changed: 51 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ const path = require("path");
1212
const { getErrorMessage } = require("./error_helpers.cjs");
1313
const { execGitSync, getGitAuthEnv } = require("./git_helpers.cjs");
1414
const { ERR_SYSTEM } = require("./error_codes.cjs");
15+
const { sanitizeForFilename, sanitizeBranchNameForPatch, sanitizeRepoSlugForPatch, getPatchPath, getPatchPathForRepo, buildExcludePathspecs, computeIncrementalDiffSize } = require("./git_patch_utils.cjs");
16+
17+
// sanitizeForFilename is re-exported below for backward compatibility with
18+
// existing callers that imported it from this module.
19+
void sanitizeForFilename;
1520

1621
/**
1722
* Debug logging helper - logs to stderr when DEBUG env var matches
@@ -24,63 +29,6 @@ function debugLog(message) {
2429
}
2530
}
2631

27-
/**
28-
* Sanitize a string for use as a patch filename component.
29-
* Replaces path separators and special characters with dashes.
30-
* @param {string} value - The value to sanitize
31-
* @param {string} fallback - Fallback value when input is empty or nullish
32-
* @returns {string} The sanitized string safe for use in a filename
33-
*/
34-
function sanitizeForFilename(value, fallback) {
35-
if (!value) return fallback;
36-
return value
37-
.replace(/[/\\:*?"<>|]/g, "-")
38-
.replace(/-{2,}/g, "-")
39-
.replace(/^-|-$/g, "")
40-
.toLowerCase();
41-
}
42-
43-
/**
44-
* Sanitize a branch name for use as a patch filename
45-
* @param {string} branchName - The branch name to sanitize
46-
* @returns {string} The sanitized branch name safe for use in a filename
47-
*/
48-
function sanitizeBranchNameForPatch(branchName) {
49-
return sanitizeForFilename(branchName, "unknown");
50-
}
51-
52-
/**
53-
* Get the patch file path for a given branch name
54-
* @param {string} branchName - The branch name
55-
* @returns {string} The full patch file path
56-
*/
57-
function getPatchPath(branchName) {
58-
const sanitized = sanitizeBranchNameForPatch(branchName);
59-
return `/tmp/gh-aw/aw-${sanitized}.patch`;
60-
}
61-
62-
/**
63-
* Sanitize a repo slug for use in a filename
64-
* @param {string} repoSlug - The repo slug (owner/repo)
65-
* @returns {string} The sanitized slug safe for use in a filename
66-
*/
67-
function sanitizeRepoSlugForPatch(repoSlug) {
68-
return sanitizeForFilename(repoSlug, "");
69-
}
70-
71-
/**
72-
* Get the patch file path for a given branch name and repo slug
73-
* Used for multi-repo scenarios to prevent patch file collisions
74-
* @param {string} branchName - The branch name
75-
* @param {string} repoSlug - The repository slug (owner/repo)
76-
* @returns {string} The full patch file path including repo disambiguation
77-
*/
78-
function getPatchPathForRepo(branchName, repoSlug) {
79-
const sanitizedBranch = sanitizeBranchNameForPatch(branchName);
80-
const sanitizedRepo = sanitizeRepoSlugForPatch(repoSlug);
81-
return `/tmp/gh-aw/aw-${sanitizedRepo}-${sanitizedBranch}.patch`;
82-
}
83-
8432
/**
8533
* Generates a git patch file for the current changes
8634
* @param {string} branchName - The branch name to generate patch for
@@ -111,15 +59,14 @@ async function generateGitPatch(branchName, baseBranch, options = {}) {
11159
// These are appended after "--" so git treats them as pathspecs, not revisions.
11260
// Using git's native pathspec magic keeps the exclusions out of the patch entirely
11361
// without any post-processing of the generated patch file.
114-
const excludePathspecs = Array.isArray(options.excludedFiles) && options.excludedFiles.length > 0 ? options.excludedFiles.map(p => `:(exclude)${p}`) : [];
62+
const excludeArgsArr = buildExcludePathspecs(options.excludedFiles);
11563

11664
/**
11765
* Returns the arguments to append to a format-patch call when excludedFiles is set.
118-
* Produces ["--", ":(exclude)pattern1", ":(exclude)pattern2", ...] or [].
11966
* @returns {string[]}
12067
*/
12168
function excludeArgs() {
122-
return excludePathspecs.length > 0 ? ["--", ...excludePathspecs] : [];
69+
return excludeArgsArr;
12370
}
12471
const patchPath = options.repoSlug ? getPatchPathForRepo(branchName, options.repoSlug) : getPatchPath(branchName);
12572

@@ -294,6 +241,25 @@ async function generateGitPatch(branchName, baseBranch, options = {}) {
294241
patchLines: 0,
295242
};
296243
}
244+
245+
// In incremental mode, the patch must be measured relative to the existing
246+
// PR branch head (origin/<branch>), never relative to the default branch.
247+
// If Strategy 1 did not produce a patch (e.g. format-patch yielded empty
248+
// output for an unusual commit shape — excluded-files filtering away every
249+
// change, or binary-only commits with unusual encoding), do NOT fall
250+
// through to Strategy 2 or Strategy 3 — those use GITHUB_SHA..HEAD or
251+
// merge-base with a remote ref and would produce a checkout-base diff
252+
// (which can be many MB on a long-running branch). Returning an explicit
253+
// error preserves the "incremental" contract that the patch reflects only
254+
// the new commits.
255+
if (!patchGenerated && mode === "incremental") {
256+
debugLog(`Strategy 1 (incremental): format-patch produced no output for ${baseRef}..${branchName} despite ${commitCount} incremental commit(s), refusing to fall through to checkout-base strategies`);
257+
return {
258+
success: false,
259+
error: `Cannot generate incremental patch: git format-patch produced no output for ${baseRef}..${branchName} despite ${commitCount} incremental commit(s).`,
260+
patchPath: patchPath,
261+
};
262+
}
297263
} catch (branchError) {
298264
// Branch does not exist locally
299265
debugLog(`Strategy 1: Branch '${branchName}' does not exist locally - ${getErrorMessage(branchError)}`);
@@ -450,12 +416,36 @@ async function generateGitPatch(branchName, baseBranch, options = {}) {
450416
};
451417
}
452418

453-
debugLog(`Final: SUCCESS - patchSize=${patchSize} bytes, patchLines=${patchLines}, baseCommit=${baseCommitSha || "(unknown)"}`);
419+
// In incremental mode, also compute the net diff size between baseRef and the
420+
// branch tip. The format-patch file size (patchSize) is the sum of every
421+
// commit's individual diff plus per-commit metadata headers, which can be
422+
// significantly larger than the actual net change. Consumers (e.g.
423+
// push_to_pull_request_branch) should validate `max_patch_size` against the
424+
// incremental net diff so the limit reflects how much the branch will
425+
// actually change, not the cumulative size of the commit history.
426+
//
427+
// The measurement itself (stream to temp file via `git diff --output`, stat,
428+
// cleanup) is extracted into git_patch_utils.computeIncrementalDiffSize so
429+
// it is O(1) memory and independently unit-testable against a real repo.
430+
let diffSize = null;
431+
if (mode === "incremental" && baseCommitSha && branchName) {
432+
diffSize = computeIncrementalDiffSize({
433+
baseRef: baseCommitSha,
434+
headRef: branchName,
435+
cwd,
436+
tmpPath: `${patchPath}.diff.tmp`,
437+
excludedFiles: options.excludedFiles,
438+
});
439+
debugLog(`Final: diffSize=${diffSize ?? "(n/a)"} bytes (baseRef=${baseCommitSha}..${branchName})`);
440+
}
441+
442+
debugLog(`Final: SUCCESS - patchSize=${patchSize} bytes, patchLines=${patchLines}, diffSize=${diffSize ?? "(n/a)"} bytes, baseCommit=${baseCommitSha || "(unknown)"}`);
454443
return {
455444
success: true,
456445
patchPath: patchPath,
457446
patchSize: patchSize,
458447
patchLines: patchLines,
448+
diffSize: diffSize,
459449
baseCommit: baseCommitSha,
460450
};
461451
}

actions/setup/js/git_patch_integration.test.cjs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,52 @@ describe("git patch integration tests", () => {
619619
}
620620
});
621621

622+
it("should report diffSize as the net diff between origin/branch and HEAD in incremental mode", async () => {
623+
// Reproduces the long-running branch scenario from the issue:
624+
// - origin/<branch> already has accumulated history (e.g. many KB)
625+
// - the agent makes a small new commit on top
626+
// - the format-patch file size only reflects the *new* commit (because
627+
// baseRef = origin/<branch>), but the returned diffSize must also be
628+
// small and must NOT reflect the divergence from main.
629+
630+
// Create the long-running branch with a "large" accumulated payload.
631+
execGit(["checkout", "-b", "long-running-branch"], { cwd: workingRepo });
632+
const accumulated = "accumulated content line\n".repeat(2000); // ~50 KB
633+
fs.writeFileSync(path.join(workingRepo, "accumulated.txt"), accumulated);
634+
execGit(["add", "accumulated.txt"], { cwd: workingRepo });
635+
execGit(["commit", "-m", "Accumulated work from previous iterations"], { cwd: workingRepo });
636+
execGit(["push", "-u", "origin", "long-running-branch"], { cwd: workingRepo });
637+
638+
// Now the agent's "new iteration": a tiny incremental change.
639+
fs.writeFileSync(path.join(workingRepo, "tiny.txt"), "tiny change\n");
640+
execGit(["add", "tiny.txt"], { cwd: workingRepo });
641+
execGit(["commit", "-m", "Tiny new iteration"], { cwd: workingRepo });
642+
643+
const origWorkspace = process.env.GITHUB_WORKSPACE;
644+
const origDefaultBranch = process.env.DEFAULT_BRANCH;
645+
process.env.GITHUB_WORKSPACE = workingRepo;
646+
process.env.DEFAULT_BRANCH = "main";
647+
648+
try {
649+
const result = await generateGitPatch("long-running-branch", "main", { mode: "incremental" });
650+
651+
expect(result.success).toBe(true);
652+
expect(typeof result.diffSize).toBe("number");
653+
654+
// The incremental net diff is just the tiny.txt addition (well under 1 KB).
655+
expect(result.diffSize).toBeGreaterThan(0);
656+
expect(result.diffSize).toBeLessThan(1024);
657+
658+
// And the diffSize must NOT include the accumulated 50 KB payload that
659+
// already exists on origin/long-running-branch — that is the entire
660+
// point of the fix.
661+
expect(result.diffSize).toBeLessThan(2000);
662+
} finally {
663+
process.env.GITHUB_WORKSPACE = origWorkspace;
664+
process.env.DEFAULT_BRANCH = origDefaultBranch;
665+
}
666+
});
667+
622668
/**
623669
* Sets GITHUB_WORKSPACE, DEFAULT_BRANCH, GITHUB_TOKEN, and GITHUB_SERVER_URL for
624670
* a test, then restores the original values (or deletes them if they were unset).
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// @ts-check
2+
/// <reference types="@actions/github-script" />
3+
4+
/**
5+
* Utilities shared by git patch generation and validation code.
6+
*
7+
* This module intentionally has no side effects and no coupling to the
8+
* patch-generation orchestration in generate_git_patch.cjs. Each helper is
9+
* pure/stateless or performs a well-defined local filesystem operation, which
10+
* keeps the surface small, easy to test against a real git repo, and reusable
11+
* by other safe-output handlers (e.g. bundle transport, create_pull_request
12+
* fallback paths).
13+
*/
14+
15+
const fs = require("fs");
16+
17+
const { getErrorMessage } = require("./error_helpers.cjs");
18+
const { execGitSync } = require("./git_helpers.cjs");
19+
20+
/**
21+
* Debug logging helper - logs to stderr when DEBUG env var matches
22+
* @param {string} message - Debug message to log
23+
*/
24+
function debugLog(message) {
25+
const debug = process.env.DEBUG || "";
26+
if (debug === "*" || debug.includes("generate_git_patch") || debug.includes("patch")) {
27+
console.error(`[git_patch_utils] ${message}`);
28+
}
29+
}
30+
31+
/**
32+
* Sanitize a string for use as a patch filename component.
33+
* Replaces path separators and special characters with dashes.
34+
* @param {string} value - The value to sanitize
35+
* @param {string} fallback - Fallback value when input is empty or nullish
36+
* @returns {string} The sanitized string safe for use in a filename
37+
*/
38+
function sanitizeForFilename(value, fallback) {
39+
if (!value) return fallback;
40+
return value
41+
.replace(/[/\\:*?"<>|]/g, "-")
42+
.replace(/-{2,}/g, "-")
43+
.replace(/^-|-$/g, "")
44+
.toLowerCase();
45+
}
46+
47+
/**
48+
* Sanitize a branch name for use as a patch filename
49+
* @param {string} branchName - The branch name to sanitize
50+
* @returns {string} The sanitized branch name safe for use in a filename
51+
*/
52+
function sanitizeBranchNameForPatch(branchName) {
53+
return sanitizeForFilename(branchName, "unknown");
54+
}
55+
56+
/**
57+
* Sanitize a repo slug for use in a filename
58+
* @param {string} repoSlug - The repo slug (owner/repo)
59+
* @returns {string} The sanitized slug safe for use in a filename
60+
*/
61+
function sanitizeRepoSlugForPatch(repoSlug) {
62+
return sanitizeForFilename(repoSlug, "");
63+
}
64+
65+
/**
66+
* Get the patch file path for a given branch name
67+
* @param {string} branchName - The branch name
68+
* @returns {string} The full patch file path
69+
*/
70+
function getPatchPath(branchName) {
71+
const sanitized = sanitizeBranchNameForPatch(branchName);
72+
return `/tmp/gh-aw/aw-${sanitized}.patch`;
73+
}
74+
75+
/**
76+
* Get the patch file path for a given branch name and repo slug
77+
* Used for multi-repo scenarios to prevent patch file collisions
78+
* @param {string} branchName - The branch name
79+
* @param {string} repoSlug - The repository slug (owner/repo)
80+
* @returns {string} The full patch file path including repo disambiguation
81+
*/
82+
function getPatchPathForRepo(branchName, repoSlug) {
83+
const sanitizedBranch = sanitizeBranchNameForPatch(branchName);
84+
const sanitizedRepo = sanitizeRepoSlugForPatch(repoSlug);
85+
return `/tmp/gh-aw/aw-${sanitizedRepo}-${sanitizedBranch}.patch`;
86+
}
87+
88+
/**
89+
* Builds the pathspec arguments to exclude specific files from a git command.
90+
* Produces ["--", ":(exclude)pattern1", ":(exclude)pattern2", ...] or [] when
91+
* the input is empty/unset. These are passed after a "--" so git treats them
92+
* as pathspecs, not revisions.
93+
*
94+
* @param {string[] | undefined | null} excludedFiles - Glob patterns to exclude
95+
* @returns {string[]} Arguments to append to a git format-patch or git diff call
96+
*/
97+
function buildExcludePathspecs(excludedFiles) {
98+
if (!Array.isArray(excludedFiles) || excludedFiles.length === 0) {
99+
return [];
100+
}
101+
return ["--", ...excludedFiles.map(p => `:(exclude)${p}`)];
102+
}
103+
104+
/**
105+
* Compute the net diff size in bytes between two refs in the given git repo.
106+
*
107+
* This is the value that should be compared against `max_patch_size` in
108+
* push_to_pull_request_branch: it reflects how much the PR branch will
109+
* actually change as a result of the push, independent of how the patch or
110+
* bundle transport encodes the commit history.
111+
*
112+
* Implementation note: we use `git diff --binary --output=<tmpfile>` rather
113+
* than buffering the diff through execGitSync's stdout. That keeps memory
114+
* usage O(1) regardless of the diff size (we just stat the file) and avoids
115+
* hitting the execGitSync maxBuffer on large binary diffs. The temp file is
116+
* removed in `finally` on success, failure, and stat failure alike.
117+
*
118+
* @param {Object} args - Arguments
119+
* @param {string} args.baseRef - Base ref (commit SHA, branch, or ref)
120+
* @param {string} args.headRef - Head ref (commit SHA, branch, or ref)
121+
* @param {string} args.cwd - Working directory containing the git repo
122+
* @param {string} args.tmpPath - Absolute path to the temp diff file (will be
123+
* written and removed by this function)
124+
* @param {string[]} [args.excludedFiles] - Glob patterns to exclude
125+
* @returns {number | null} The net diff size in bytes, or null on failure
126+
*/
127+
function computeIncrementalDiffSize({ baseRef, headRef, cwd, tmpPath, excludedFiles }) {
128+
if (!baseRef || !headRef || !cwd || !tmpPath) {
129+
return null;
130+
}
131+
const excludeArgs = buildExcludePathspecs(excludedFiles);
132+
let diffSize = null;
133+
try {
134+
execGitSync(["diff", "--binary", `--output=${tmpPath}`, `${baseRef}..${headRef}`, ...excludeArgs], { cwd });
135+
if (fs.existsSync(tmpPath)) {
136+
diffSize = fs.statSync(tmpPath).size;
137+
debugLog(`Computed incremental net diffSize=${diffSize} bytes (baseRef=${baseRef}..${headRef})`);
138+
}
139+
} catch (diffErr) {
140+
debugLog(`Failed to compute incremental net diffSize - ${getErrorMessage(diffErr)}`);
141+
} finally {
142+
// Best-effort cleanup of the temp diff file; we only needed its size.
143+
try {
144+
if (fs.existsSync(tmpPath)) {
145+
fs.unlinkSync(tmpPath);
146+
}
147+
} catch {
148+
// Cleanup failure is non-fatal.
149+
}
150+
}
151+
return diffSize;
152+
}
153+
154+
module.exports = {
155+
sanitizeForFilename,
156+
sanitizeBranchNameForPatch,
157+
sanitizeRepoSlugForPatch,
158+
getPatchPath,
159+
getPatchPathForRepo,
160+
buildExcludePathspecs,
161+
computeIncrementalDiffSize,
162+
};

0 commit comments

Comments
 (0)