Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 52 additions & 2 deletions backend/packages/harness/deerflow/sandbox/local/local_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def __init__(self, id: str, path_mappings: list[PathMapping] | None = None):
"""
super().__init__(id)
self.path_mappings = path_mappings or []
# Track files written through write_file so read_file only
# reverse-resolves paths in agent-authored content.
self._agent_written_paths: set[str] = set()

def _is_read_only_path(self, resolved_path: str) -> bool:
"""Check if a resolved path is under a read-only mount.
Expand Down Expand Up @@ -205,6 +208,39 @@ def replace_match(match: re.Match) -> str:

return pattern.sub(replace_match, command)

def _resolve_paths_in_content(self, content: str) -> str:
"""Resolve container paths to local paths in arbitrary file content.

Unlike ``_resolve_paths_in_command`` which uses shell-aware boundary
characters, this method treats the content as plain text and resolves
every occurrence of a container path prefix. Resolved paths are
normalized to forward slashes to avoid backslash-escape issues on
Windows hosts (e.g. ``C:\\Users\\..`` breaking Python string literals).

Args:
content: File content that may contain container paths.

Returns:
Content with container paths resolved to local paths (forward slashes).
"""
import re

sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True)
if not sorted_mappings:
return content

patterns = [re.escape(m.container_path) + r"(?=/|$|[^\w./-])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings]
pattern = re.compile("|".join(f"({p})" for p in patterns))

def replace_match(match: re.Match) -> str:
matched_path = match.group(0)
resolved = self._resolve_path(matched_path)
# Normalize to forward slashes so that Windows backslash paths
# don't create invalid escape sequences in source files.
return resolved.replace("\\", "/")

return pattern.sub(replace_match, content)

@staticmethod
def _get_shell() -> str:
"""Detect available shell executable with fallback."""
Expand Down Expand Up @@ -280,7 +316,14 @@ def read_file(self, path: str) -> str:
resolved_path = self._resolve_path(path)
try:
with open(resolved_path, encoding="utf-8") as f:
return f.read()
content = f.read()
# Only reverse-resolve paths in files that were previously written
# by write_file (agent-authored content). User-uploaded files,
# external tool output, and other non-agent content should not be
# silently rewritten — see discussion on PR #1935.
if resolved_path in self._agent_written_paths:
content = self._reverse_resolve_paths_in_output(content)
return content
except OSError as e:
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
raise type(e)(e.errno, e.strerror, path) from None
Expand All @@ -293,9 +336,16 @@ def write_file(self, path: str, content: str, append: bool = False) -> None:
dir_path = os.path.dirname(resolved_path)
if dir_path:
os.makedirs(dir_path, exist_ok=True)
# Resolve container paths in content to local paths
# using the content-specific resolver (forward-slash safe)
resolved_content = self._resolve_paths_in_content(content)
mode = "a" if append else "w"
Comment on lines +339 to 342
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_resolve_paths_in_command() is documented and tuned for shell command parsing (boundary chars, quoting, etc.), but it’s now being reused to transform arbitrary file contents. That coupling is easy to miss and makes future tweaks to command parsing potentially break file IO semantics. Consider extracting a dedicated helper for “resolve paths in text content” (or renaming/generalizing the helper) so the intent and constraints are explicit.

Copilot uses AI. Check for mistakes.
with open(resolved_path, mode, encoding="utf-8") as f:
f.write(content)
f.write(resolved_content)
Comment on lines +339 to +344
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Windows hosts, self._resolve_path() will typically return paths with backslashes (because LocalSandboxProvider stores local_path as Path(...).resolve()), so resolved_content can inject sequences like C:\Users\... into source files. In common cases (e.g. Python scripts), those backslashes inside string literals can create invalid escape sequences (e.g. \U...) and break execution. Consider normalizing resolved paths inserted into file content to a safer representation (e.g. forward slashes) and ensure read_file’s reverse-resolver also recognizes that normalized form so agents don’t see leaked host paths.

Copilot uses AI. Check for mistakes.
# Track this path so read_file knows to reverse-resolve on read.
# Only agent-written files get reverse-resolved; user uploads and
# external tool output are left untouched.
self._agent_written_paths.add(resolved_path)
except OSError as e:
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
raise type(e)(e.errno, e.strerror, path) from None
Expand Down
92 changes: 92 additions & 0 deletions backend/tests/test_local_sandbox_provider_mounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,98 @@ def test_setup_path_mappings_skips_non_absolute_container_path(self, tmp_path):

assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]

def test_write_file_resolves_container_paths_in_content(self, tmp_path):
"""write_file should replace container paths in file content with local paths."""
data_dir = tmp_path / "data"
data_dir.mkdir()

sandbox = LocalSandbox(
"test",
[
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
],
)
sandbox.write_file(
"/mnt/data/script.py",
'import pathlib\npath = "/mnt/data/output"\nprint(path)',
)
written = (data_dir / "script.py").read_text()
# Container path should be resolved to local path (forward slashes)
assert str(data_dir).replace("\\", "/") in written
assert "/mnt/data/output" not in written

def test_write_file_uses_forward_slashes_on_windows_paths(self, tmp_path):
"""Resolved paths in content should always use forward slashes."""
data_dir = tmp_path / "data"
data_dir.mkdir()

sandbox = LocalSandbox(
"test",
[
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
],
)
sandbox.write_file(
"/mnt/data/config.py",
'DATA_DIR = "/mnt/data/files"',
)
written = (data_dir / "config.py").read_text()
# Must not contain backslashes that could break escape sequences
assert "\\" not in written.split("DATA_DIR = ")[1].split("\n")[0]

def test_read_file_reverse_resolves_local_paths_in_agent_written_files(self, tmp_path):
"""read_file should convert local paths back to container paths in agent-written files."""
data_dir = tmp_path / "data"
data_dir.mkdir()

sandbox = LocalSandbox(
"test",
[
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
],
)
# Use write_file so the path is tracked as agent-written
sandbox.write_file("/mnt/data/info.txt", "File located at: /mnt/data/info.txt")

content = sandbox.read_file("/mnt/data/info.txt")
assert "/mnt/data/info.txt" in content

def test_read_file_does_not_reverse_resolve_non_agent_files(self, tmp_path):
"""read_file should NOT rewrite paths in user-uploaded or external files."""
data_dir = tmp_path / "data"
data_dir.mkdir()

sandbox = LocalSandbox(
"test",
[
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
],
)
# Write directly to filesystem (simulates user upload or external tool output)
local_path = str(data_dir).replace("\\", "/")
(data_dir / "config.yml").write_text(f"output_dir: {local_path}/outputs")

content = sandbox.read_file("/mnt/data/config.yml")
# Content should be returned as-is, NOT reverse-resolved
assert local_path in content

def test_write_then_read_roundtrip(self, tmp_path):
"""Container paths survive a write → read roundtrip."""
data_dir = tmp_path / "data"
data_dir.mkdir()

sandbox = LocalSandbox(
"test",
[
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
],
)
original = 'cfg = {"path": "/mnt/data/config.json", "flag": true}'
sandbox.write_file("/mnt/data/settings.py", original)
result = sandbox.read_file("/mnt/data/settings.py")
# The container path should be preserved through roundtrip
assert "/mnt/data/config.json" in result

def test_setup_path_mappings_normalizes_container_path_trailing_slash(self, tmp_path):
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
Expand Down