Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions verifiers/envs/experimental/composable/composable_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ def get_sandbox_resources(self, state: State) -> dict[str, Any]:

async def build_env_vars(self, state: State) -> dict[str, str]:
env_vars = await super().build_env_vars(state)
# Harness env vars act as defaults — only fill keys the user
# (via ``CLIAgentEnv(environment_vars=...)``) did not set.
for k, v in self.harness.env_vars.items():
env_vars.setdefault(k, v)
info = state.get("info") or {}
task_env_vars = self.taskset.get_env_vars()
if task_env_vars:
Expand Down
10 changes: 9 additions & 1 deletion verifiers/envs/experimental/composable/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from __future__ import annotations

from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

if TYPE_CHECKING:
Expand Down Expand Up @@ -75,6 +75,13 @@ class Harness:
metrics_keys:
Optional whitelist of metric keys to surface. ``None`` means
surface all keys found.
env_vars:
Harness-owned sandbox env vars forwarded to the agent at
runtime. Merged into the sandbox env by ``ComposableEnv`` with
the lowest precedence — values set via
``CLIAgentEnv(environment_vars=...)`` or by the task win.
Lets a harness declare agent defaults (e.g. the rlm harness
setting ``OPENAI_API_KEY=intercepted``) in one place.
"""

install_script: str | None = None
Expand All @@ -90,6 +97,7 @@ class Harness:
metrics_prefix: str = ""
metrics_key: str | None = None
metrics_keys: list[str] | None = None
env_vars: dict[str, str] = field(default_factory=dict)

def get_effective_upload_dir_mapping(self) -> dict[str, str] | None:
"""Return the merged upload mapping (skills_path + upload_dir_mapping)."""
Expand Down
17 changes: 17 additions & 0 deletions verifiers/envs/experimental/composable/harnesses/rlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,19 @@
DEFAULT_RLM_MAX_TURNS = 100
DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/task/append_to_system_prompt.txt"

# Harness-owned sandbox env vars for the rlm agent. Kept intentionally
# minimal: OPENAI_API_KEY=intercepted routes inference through the
# verifiers interception tunnel (sandbox plumbing, not an rlm knob);
# RLM_MAX_TURNS raises rlm's dev default (30) to a value appropriate
# for longer sandbox-based rollouts. Every other rlm-side knob
# (RLM_MAX_TURNS_IN_CONTEXT, RLM_EXEC_TIMEOUT, RLM_ENABLED_TOOLS, ...)
# is left to rlm's own defaults; callers pass overrides via
# ``rlm_harness(env_vars={...})``.
DEFAULT_RLM_ENV_VARS: dict[str, str] = {
"OPENAI_API_KEY": "intercepted",
"RLM_MAX_TURNS": str(DEFAULT_RLM_MAX_TURNS),
}


def build_install_script(
rlm_repo_url: str = DEFAULT_RLM_REPO_URL,
Expand Down Expand Up @@ -68,7 +81,10 @@ def rlm_harness(
rlm_repo_url: str = DEFAULT_RLM_REPO_URL,
rlm_branch: str = DEFAULT_RLM_BRANCH,
append_to_system_prompt: str | None = None,
env_vars: dict[str, str] | None = None,
) -> Harness:
"""Build the rlm harness. ``env_vars`` layers on top of
``DEFAULT_RLM_ENV_VARS``; user keys win."""
return Harness(
install_script=build_install_script(rlm_repo_url, rlm_branch),
run_command=build_run_command(instruction_path, workdir),
Expand All @@ -79,4 +95,5 @@ def rlm_harness(
metrics_path="{workdir}/.rlm/sessions/*/meta.json",
metrics_key="metrics",
metrics_prefix="rlm_",
env_vars={**DEFAULT_RLM_ENV_VARS, **(env_vars or {})},
)
Loading