-
Notifications
You must be signed in to change notification settings - Fork 7.7k
fix(sandbox): resolve paths in read_file/write_file content for LocalSandbox #1935
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ad04ddb
19c3f0d
dd4c7d8
beb4e20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,6 +62,9 @@ def __init__(self, id: str, path_mappings: list[PathMapping] | None = None): | |
| """ | ||
| super().__init__(id) | ||
| self.path_mappings = path_mappings or [] | ||
| # Track files written through write_file so read_file only | ||
| # reverse-resolves paths in agent-authored content. | ||
| self._agent_written_paths: set[str] = set() | ||
|
|
||
| def _is_read_only_path(self, resolved_path: str) -> bool: | ||
| """Check if a resolved path is under a read-only mount. | ||
|
|
@@ -205,6 +208,39 @@ def replace_match(match: re.Match) -> str: | |
|
|
||
| return pattern.sub(replace_match, command) | ||
|
|
||
| def _resolve_paths_in_content(self, content: str) -> str: | ||
| """Resolve container paths to local paths in arbitrary file content. | ||
|
|
||
| Unlike ``_resolve_paths_in_command`` which uses shell-aware boundary | ||
| characters, this method treats the content as plain text and resolves | ||
| every occurrence of a container path prefix. Resolved paths are | ||
| normalized to forward slashes to avoid backslash-escape issues on | ||
| Windows hosts (e.g. ``C:\\Users\\..`` breaking Python string literals). | ||
|
|
||
| Args: | ||
| content: File content that may contain container paths. | ||
|
|
||
| Returns: | ||
| Content with container paths resolved to local paths (forward slashes). | ||
| """ | ||
| import re | ||
|
|
||
| sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True) | ||
| if not sorted_mappings: | ||
| return content | ||
|
|
||
| patterns = [re.escape(m.container_path) + r"(?=/|$|[^\w./-])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings] | ||
| pattern = re.compile("|".join(f"({p})" for p in patterns)) | ||
|
|
||
| def replace_match(match: re.Match) -> str: | ||
| matched_path = match.group(0) | ||
| resolved = self._resolve_path(matched_path) | ||
| # Normalize to forward slashes so that Windows backslash paths | ||
| # don't create invalid escape sequences in source files. | ||
| return resolved.replace("\\", "/") | ||
|
|
||
| return pattern.sub(replace_match, content) | ||
|
|
||
| @staticmethod | ||
| def _get_shell() -> str: | ||
| """Detect available shell executable with fallback.""" | ||
|
|
@@ -280,7 +316,14 @@ def read_file(self, path: str) -> str: | |
| resolved_path = self._resolve_path(path) | ||
| try: | ||
| with open(resolved_path, encoding="utf-8") as f: | ||
| return f.read() | ||
| content = f.read() | ||
| # Only reverse-resolve paths in files that were previously written | ||
| # by write_file (agent-authored content). User-uploaded files, | ||
| # external tool output, and other non-agent content should not be | ||
| # silently rewritten — see discussion on PR #1935. | ||
| if resolved_path in self._agent_written_paths: | ||
| content = self._reverse_resolve_paths_in_output(content) | ||
| return content | ||
| except OSError as e: | ||
| # Re-raise with the original path for clearer error messages, hiding internal resolved paths | ||
| raise type(e)(e.errno, e.strerror, path) from None | ||
|
|
@@ -293,9 +336,16 @@ def write_file(self, path: str, content: str, append: bool = False) -> None: | |
| dir_path = os.path.dirname(resolved_path) | ||
| if dir_path: | ||
| os.makedirs(dir_path, exist_ok=True) | ||
| # Resolve container paths in content to local paths | ||
| # using the content-specific resolver (forward-slash safe) | ||
| resolved_content = self._resolve_paths_in_content(content) | ||
| mode = "a" if append else "w" | ||
| with open(resolved_path, mode, encoding="utf-8") as f: | ||
| f.write(content) | ||
| f.write(resolved_content) | ||
|
Comment on lines
+339
to
+344
|
||
| # Track this path so read_file knows to reverse-resolve on read. | ||
| # Only agent-written files get reverse-resolved; user uploads and | ||
| # external tool output are left untouched. | ||
| self._agent_written_paths.add(resolved_path) | ||
| except OSError as e: | ||
| # Re-raise with the original path for clearer error messages, hiding internal resolved paths | ||
| raise type(e)(e.errno, e.strerror, path) from None | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_resolve_paths_in_command()is documented and tuned for shell command parsing (boundary chars, quoting, etc.), but it’s now being reused to transform arbitrary file contents. That coupling is easy to miss and makes future tweaks to command parsing potentially break file IO semantics. Consider extracting a dedicated helper for “resolve paths in text content” (or renaming/generalizing the helper) so the intent and constraints are explicit.