Skip to content

Commit 08b7f76

Browse files
authored
fix(inference): scope accuracy notices to the specific affected runs (#467)
1 parent 3f1b944 commit 08b7f76

4 files changed

Lines changed: 184 additions & 65 deletions

File tree

packages/app/src/components/inference/ui/GPUGraph.tsx

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ import {
5353
measureLegendRightInset,
5454
renderKnownIssueAnnotations,
5555
} from '@/components/inference/utils/knownIssueAnnotations';
56-
import { matchKnownConfigIssues } from '@/lib/known-issues';
56+
import { matchKnownConfigIssues, pointMatchesIssue } from '@/lib/known-issues';
5757

5858
const CHART_MARGIN = { top: 24, right: 10, bottom: 60, left: 60 };
5959

@@ -275,11 +275,7 @@ const GPUGraph = React.memo(
275275
label: cfg ? getDisplayLabel(cfg) : issue.hwKey,
276276
color: getCssColor(colorEntry?.color ?? resolveColor(issue.hwKey)),
277277
points: filteredData
278-
.filter(
279-
(p) =>
280-
String(p.hwKey) === issue.hwKey &&
281-
(!issue.precisions || issue.precisions.includes(p.precision)),
282-
)
278+
.filter((p) => pointMatchesIssue(issue, p))
283279
.map((p) => ({ x: p.x, y: p.y })),
284280
};
285281
}),

packages/app/src/components/inference/ui/ScatterGraph.tsx

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { useUnofficialRun } from '@/components/unofficial-run-provider';
1212
import { computeToggle } from '@/hooks/useTogglableSet';
1313
import { getHardwareConfig, getModelSortIndex } from '@/lib/constants';
1414
import { getChartWatermark, getPrecisionLabel, type Precision } from '@/lib/data-mappings';
15-
import { matchKnownConfigIssues } from '@/lib/known-issues';
15+
import { matchKnownConfigIssues, pointMatchesIssue } from '@/lib/known-issues';
1616
import { formatNumber, getDisplayLabel, updateRepoUrl } from '@/lib/utils';
1717
import { D3Chart } from '@/lib/d3-chart/D3Chart';
1818
import type {
@@ -369,11 +369,7 @@ const ScatterGraph = React.memo(
369369
label: parseHwKeyToLabel(issue.hwKey).label,
370370
color: getCssColor(resolveColor(issue.hwKey)),
371371
points: visiblePoints
372-
.filter(
373-
(p) =>
374-
String(p.hwKey) === issue.hwKey &&
375-
(!issue.precisions || issue.precisions.includes(p.precision)),
376-
)
372+
.filter((p) => pointMatchesIssue(issue, p))
377373
.map((p) => ({ x: p.x, y: p.y })),
378374
}));
379375
}, [

packages/app/src/lib/known-issues.test.ts

Lines changed: 149 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,170 @@
11
import { describe, expect, it } from 'vitest';
22

3-
import { KNOWN_CONFIG_ISSUES, knownIssueCsvNote, matchKnownConfigIssues } from './known-issues';
3+
import {
4+
type KnownConfigIssue,
5+
KNOWN_CONFIG_ISSUES,
6+
knownIssueCsvNote,
7+
matchKnownConfigIssues,
8+
pointMatchesIssue,
9+
runIdFromRunUrl,
10+
} from './known-issues';
411

512
const DSR1 = 'DeepSeek-R1-0528';
613

7-
describe('matchKnownConfigIssues', () => {
8-
it('matches the GB300 Dynamo TRT MTP entry for DeepSeek R1 FP8', () => {
9-
const issues = matchKnownConfigIssues(DSR1, [
10-
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' },
11-
]);
12-
expect(issues).toHaveLength(1);
13-
expect(issues[0].url).toBe('https://github.com/NVIDIA/srt-slurm/issues/51');
14+
const GB300_AFFECTED_RUN = '21785935852';
15+
const GB300_AFFECTED_URL = `https://github.com/SemiAnalysisAI/InferenceX/actions/runs/${GB300_AFFECTED_RUN}/attempts/2`;
16+
const GB300_AFFECTED_URL_FEB5 =
17+
'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/21726915223/attempts/1';
18+
const GB300_FIXED_URL =
19+
'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/99999999999/attempts/1';
20+
21+
const MI355X_AFFECTED_URL_MAR13 =
22+
'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/23052579053';
23+
const MI355X_AFFECTED_URL_MAY7 =
24+
'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/25471873049';
25+
const MI355X_AFFECTED_URL_MAY31 =
26+
'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/26714221123';
27+
const MI355X_UNFLAGGED_URL =
28+
'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/26491418772';
29+
30+
const gb300Issue = KNOWN_CONFIG_ISSUES.find((i) => i.hwKey === 'gb300_dynamo-trt_mtp')!;
31+
const mi355xIssue = KNOWN_CONFIG_ISSUES.find((i) => i.hwKey === 'mi355x_mori-sglang_mtp')!;
32+
33+
describe('runIdFromRunUrl', () => {
34+
it('extracts the run id, ignoring the /attempts suffix and host', () => {
35+
expect(runIdFromRunUrl(GB300_AFFECTED_URL)).toBe(GB300_AFFECTED_RUN);
36+
expect(runIdFromRunUrl('https://example.test/x/runs/777/attempts/3')).toBe('777');
1437
});
1538

16-
it('does not match GB300 Dynamo TRT MTP for non-FP8 precisions', () => {
17-
const issues = matchKnownConfigIssues(DSR1, [
18-
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp4' },
19-
]);
20-
expect(issues).toHaveLength(0);
39+
it('returns null for missing or unparseable URLs', () => {
40+
expect(runIdFromRunUrl(undefined)).toBeNull();
41+
expect(runIdFromRunUrl(null)).toBeNull();
42+
expect(runIdFromRunUrl('https://github.com/o/r/actions')).toBeNull();
2143
});
44+
});
2245

23-
it('matches the MI355X MoRI SGLang MTP entry for DeepSeek R1 FP4', () => {
24-
const issues = matchKnownConfigIssues(DSR1, [
25-
{ hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4' },
26-
]);
27-
expect(issues).toHaveLength(1);
28-
expect(issues[0].url).toBe('https://github.com/sgl-project/sglang/issues/27194');
46+
// pointMatchesIssue holds the real matching logic; matchKnownConfigIssues just
47+
// wraps it in a model filter + dedup, so the behavior matrix lives here.
48+
describe('pointMatchesIssue', () => {
49+
it('matches each affected run of a run-scoped issue and nothing else', () => {
50+
for (const run_url of [GB300_AFFECTED_URL, GB300_AFFECTED_URL_FEB5]) {
51+
expect(
52+
pointMatchesIssue(gb300Issue, { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8', run_url }),
53+
).toBe(true);
54+
}
55+
expect(
56+
pointMatchesIssue(gb300Issue, {
57+
hwKey: 'gb300_dynamo-trt_mtp',
58+
precision: 'fp8',
59+
run_url: GB300_FIXED_URL,
60+
}),
61+
).toBe(false);
62+
63+
for (const run_url of [
64+
MI355X_AFFECTED_URL_MAR13,
65+
MI355X_AFFECTED_URL_MAY7,
66+
MI355X_AFFECTED_URL_MAY31,
67+
]) {
68+
expect(
69+
pointMatchesIssue(mi355xIssue, {
70+
hwKey: 'mi355x_mori-sglang_mtp',
71+
precision: 'fp4',
72+
run_url,
73+
}),
74+
).toBe(true);
75+
}
76+
expect(
77+
pointMatchesIssue(mi355xIssue, {
78+
hwKey: 'mi355x_mori-sglang_mtp',
79+
precision: 'fp4',
80+
run_url: MI355X_UNFLAGGED_URL,
81+
}),
82+
).toBe(false);
2983
});
3084

31-
it('does not match MI355X MoRI SGLang MTP for non-FP4 precisions', () => {
32-
const issues = matchKnownConfigIssues(DSR1, [
33-
{ hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp8' },
34-
]);
35-
expect(issues).toHaveLength(0);
85+
it('ignores the /attempts/<n> suffix when matching the run id', () => {
86+
const reattempt = `https://github.com/SemiAnalysisAI/InferenceX/actions/runs/${GB300_AFFECTED_RUN}/attempts/1`;
87+
expect(
88+
pointMatchesIssue(gb300Issue, {
89+
hwKey: 'gb300_dynamo-trt_mtp',
90+
precision: 'fp8',
91+
run_url: reattempt,
92+
}),
93+
).toBe(true);
3694
});
3795

38-
it('does not match other models', () => {
39-
const issues = matchKnownConfigIssues('DeepSeek-V4-Pro', [
40-
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp4' },
41-
{ hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4' },
42-
]);
43-
expect(issues).toHaveLength(0);
96+
it('does not match a run-scoped issue when the point has no run_url', () => {
97+
expect(pointMatchesIssue(gb300Issue, { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' })).toBe(
98+
false,
99+
);
100+
});
101+
102+
it('does not match on a wrong precision or hwKey', () => {
103+
expect(
104+
pointMatchesIssue(gb300Issue, {
105+
hwKey: 'gb300_dynamo-trt_mtp',
106+
precision: 'fp4',
107+
run_url: GB300_AFFECTED_URL,
108+
}),
109+
).toBe(false);
110+
expect(
111+
pointMatchesIssue(gb300Issue, {
112+
hwKey: 'b200_trt_mtp',
113+
precision: 'fp8',
114+
run_url: GB300_AFFECTED_URL,
115+
}),
116+
).toBe(false);
117+
});
118+
119+
it('matches an issue with no affectedRuns on any point regardless of run', () => {
120+
const unscoped: KnownConfigIssue = {
121+
hwKey: 'foo_bar_mtp',
122+
model: gb300Issue.model,
123+
precisions: ['fp8'],
124+
summary: 'Accuracy issues',
125+
filed: 'Jan 1, 2026',
126+
url: 'https://example.test/issue',
127+
issueRef: 'example/repo#1',
128+
};
129+
expect(pointMatchesIssue(unscoped, { hwKey: 'foo_bar_mtp', precision: 'fp8' })).toBe(true);
130+
expect(
131+
pointMatchesIssue(unscoped, {
132+
hwKey: 'foo_bar_mtp',
133+
precision: 'fp8',
134+
run_url: GB300_FIXED_URL,
135+
}),
136+
).toBe(true);
44137
});
138+
});
45139

46-
it('does not match unaffected configs (non-MTP, other hardware)', () => {
47-
const issues = matchKnownConfigIssues(DSR1, [
48-
{ hwKey: 'gb300_dynamo-trt', precision: 'fp4' },
49-
{ hwKey: 'mi355x_sglang', precision: 'fp4' },
50-
{ hwKey: 'b200_trt_mtp', precision: 'fp4' },
140+
describe('matchKnownConfigIssues', () => {
141+
it('resolves a matching point to its issue, per config', () => {
142+
const gb = matchKnownConfigIssues(DSR1, [
143+
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8', run_url: GB300_AFFECTED_URL },
51144
]);
52-
expect(issues).toHaveLength(0);
145+
expect(gb).toHaveLength(1);
146+
expect(gb[0].issueRef).toBe('NVIDIA/srt-slurm#51');
147+
148+
const amd = matchKnownConfigIssues(DSR1, [
149+
{ hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4', run_url: MI355X_AFFECTED_URL_MAY7 },
150+
]);
151+
expect(amd).toHaveLength(1);
152+
expect(amd[0].issueRef).toBe('sgl-project/sglang#27194');
53153
});
54154

55-
it('returns each issue at most once even with many matching points', () => {
56-
const issues = matchKnownConfigIssues(DSR1, [
57-
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' },
58-
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' },
59-
{ hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4' },
155+
it('filters by model and returns each issue at most once', () => {
156+
expect(
157+
matchKnownConfigIssues('DeepSeek-V4-Pro', [
158+
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8', run_url: GB300_AFFECTED_URL },
159+
]),
160+
).toHaveLength(0);
161+
162+
const both = matchKnownConfigIssues(DSR1, [
163+
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8', run_url: GB300_AFFECTED_URL },
164+
{ hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8', run_url: GB300_AFFECTED_URL },
165+
{ hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4', run_url: MI355X_AFFECTED_URL_MAY7 },
60166
]);
61-
expect(issues).toHaveLength(2);
167+
expect(both).toHaveLength(2);
62168
});
63169

64170
it('returns nothing for an empty point list', () => {
@@ -68,7 +174,7 @@ describe('matchKnownConfigIssues', () => {
68174

69175
describe('knownIssueCsvNote', () => {
70176
it('includes the config label, filing date, issue ref, and URL', () => {
71-
const note = knownIssueCsvNote(KNOWN_CONFIG_ISSUES[0], 'GB300 NVL72 (Dynamo TRT, MTP)');
177+
const note = knownIssueCsvNote(gb300Issue, 'GB300 NVL72 (Dynamo TRT, MTP)');
72178
expect(note).toContain('WARNING: GB300 NVL72 (Dynamo TRT, MTP)');
73179
expect(note).toContain('filed since Apr 21, 2026');
74180
expect(note).toContain('NVIDIA/srt-slurm#51');

packages/app/src/lib/known-issues.ts

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ export interface KnownConfigIssue {
1515
model: Model;
1616
/** Precisions the issue applies to; omit to match every precision */
1717
precisions?: string[];
18+
/** GitHub Actions run IDs (the numeric `/runs/<id>` segment) the issue applies to; omit to match every run */
19+
affectedRuns?: string[];
1820
/** Short description shown in the warning box, e.g. "Accuracy issues" */
1921
summary: string;
2022
/** Human-readable filing date, e.g. "Apr 21, 2026" */
@@ -30,6 +32,7 @@ export const KNOWN_CONFIG_ISSUES: KnownConfigIssue[] = [
3032
hwKey: 'gb300_dynamo-trt_mtp',
3133
model: Model.DeepSeek_R1,
3234
precisions: ['fp8'],
35+
affectedRuns: ['21726915223', '21785935852'],
3336
summary: 'Accuracy issues',
3437
filed: 'Apr 21, 2026',
3538
url: 'https://github.com/NVIDIA/srt-slurm/issues/51',
@@ -39,6 +42,7 @@ export const KNOWN_CONFIG_ISSUES: KnownConfigIssue[] = [
3942
hwKey: 'mi355x_mori-sglang_mtp',
4043
model: Model.DeepSeek_R1,
4144
precisions: ['fp4'],
45+
affectedRuns: ['23052579053', '25471873049', '26714221123'],
4246
summary: 'Accuracy issues',
4347
filed: 'Jun 4, 2026',
4448
url: 'https://github.com/sgl-project/sglang/issues/27194',
@@ -50,25 +54,42 @@ export const KNOWN_CONFIG_ISSUES: KnownConfigIssue[] = [
5054
export interface MatchablePoint {
5155
hwKey: string | number;
5256
precision: string;
57+
/** Run URL of the GitHub Actions run that produced this point, if known. */
58+
run_url?: string;
59+
}
60+
61+
/** Numeric GitHub Actions run id from a run URL (ignores any `/attempts/<n>` suffix), or null. */
62+
export function runIdFromRunUrl(runUrl: string | null | undefined): string | null {
63+
return runUrl?.match(/\/runs\/(?<runId>\d+)/u)?.groups?.runId ?? null;
64+
}
65+
66+
/**
67+
* Whether a chart point falls under a known issue (hwKey + precision + run scope).
68+
* Shared by matchKnownConfigIssues and the on-chart warning-arrow point filters so
69+
* both agree on which points an issue covers. Run-scoped issues never match a point
70+
* whose run id is unknown.
71+
*/
72+
export function pointMatchesIssue(issue: KnownConfigIssue, p: MatchablePoint): boolean {
73+
if (String(p.hwKey) !== issue.hwKey) return false;
74+
if (issue.precisions && !issue.precisions.includes(p.precision)) return false;
75+
if (issue.affectedRuns) {
76+
const runId = runIdFromRunUrl(p.run_url);
77+
if (runId === null || !issue.affectedRuns.includes(runId)) return false;
78+
}
79+
return true;
5380
}
5481

5582
/**
56-
* Return the known issues whose (model, hwKey, precision) matches at least one
57-
* visible chart point. Order follows KNOWN_CONFIG_ISSUES; each issue appears at
58-
* most once.
83+
* Return the known issues whose (model, hwKey, precision, run) matches at least
84+
* one visible chart point. Order follows KNOWN_CONFIG_ISSUES; each issue appears
85+
* at most once.
5986
*/
6087
export function matchKnownConfigIssues(
6188
model: string,
6289
points: MatchablePoint[],
6390
): KnownConfigIssue[] {
6491
return KNOWN_CONFIG_ISSUES.filter(
65-
(issue) =>
66-
issue.model === model &&
67-
points.some(
68-
(p) =>
69-
String(p.hwKey) === issue.hwKey &&
70-
(!issue.precisions || issue.precisions.includes(p.precision)),
71-
),
92+
(issue) => issue.model === model && points.some((p) => pointMatchesIssue(issue, p)),
7293
);
7394
}
7495

0 commit comments

Comments
 (0)