Skip to content

Commit e40c804

Browse files
committed
feat: support customizing visual auto-rater
1 parent b8371cb commit e40c804

File tree

3 files changed

+23
-1
lines changed

3 files changed

+23
-1
lines changed

runner/configuration/environment-config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ export const environmentConfigSchema = z.object({
7070
fullStackFramework: z.string().optional(),
7171
/** Path to the prompt to use when rating code. */
7272
codeRatingPrompt: z.string().optional(),
73+
/** Path to the prompt to use when rating screenshots. */
74+
visualRatingPrompt: z.string().optional(),
7375
/** When enabled, the system prompts for this environment won't be included in the report. */
7476
classifyPrompts: z.boolean().optional(),
7577
/**

runner/configuration/environment.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ export class Environment {
5050
readonly clientSideFramework: FrameworkInfo;
5151
/** Path from which to read the code rating prompt. */
5252
readonly codeRatingPromptPath: string | null;
53+
/** Path from which to read the visual rating prompt. */
54+
readonly visualRatingPromptPath: string | null;
5355
/** Whether the prompts should be removed from the final report. */
5456
readonly classifyPrompts: boolean;
5557
/** Whether this is one of the built-in environment that come with the runner. */
@@ -109,6 +111,9 @@ export class Environment {
109111
this.codeRatingPromptPath = config.codeRatingPrompt
110112
? join(rootPath, config.codeRatingPrompt)
111113
: null;
114+
this.visualRatingPromptPath = config.visualRatingPrompt
115+
? join(rootPath, config.visualRatingPrompt)
116+
: null;
112117
this.classifyPrompts = config.classifyPrompts ?? false;
113118
this.isBuiltIn = rootPath.includes('node_modules');
114119
this.executor = config.executor;

runner/ratings/autoraters/visuals-rater.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ import {Environment} from '../../configuration/environment.js';
1212
import {screenshotUrlToPngBuffer} from '../../utils/screenshots.js';
1313
import {Usage} from '../../shared-interfaces.js';
1414
import {AiSdkRunner} from '../../codegen/ai-sdk/ai-sdk-runner.js';
15+
import {readFileSync} from 'fs';
16+
17+
/** Cache for visual rating prompts that have been read from disk. */
18+
const CACHED_VISUAL_RATING_PROMPTS: Record<string, string> = {};
1519

1620
/**
1721
* Automatically rate the appearance of a screenshot using an LLM.
@@ -32,7 +36,18 @@ export async function autoRateAppearance(
3236
screenshotPngUrl: string,
3337
label: string,
3438
): Promise<AutoRateResult> {
35-
const prompt = environment.renderPrompt(defaultVisualRaterPrompt, null, {
39+
let promptText: string;
40+
if (environment.visualRatingPromptPath) {
41+
CACHED_VISUAL_RATING_PROMPTS[environment.visualRatingPromptPath] ??= readFileSync(
42+
environment.visualRatingPromptPath,
43+
'utf8',
44+
);
45+
promptText = CACHED_VISUAL_RATING_PROMPTS[environment.visualRatingPromptPath];
46+
} else {
47+
promptText = defaultVisualRaterPrompt;
48+
}
49+
50+
const prompt = environment.renderPrompt(promptText, environment.visualRatingPromptPath, {
3651
APP_PROMPT: appPrompt,
3752
}).result;
3853

0 commit comments

Comments
 (0)