Skip to content

Commit d5c2b9d

Browse files
authored
Python: Improves the robustness of filename handling (#13643)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Improves the robustness of filename handling in the Bedrock agent integration. Ensures that filenames sourced from external responses are properly sanitized before use in file operations, and adds safer defaults for file write behavior. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> 1. Sanitize filenames received from Bedrock agent responses. 2. Add overwrite parameter to BinaryContent.write_to_file (defaults to False). 3. Add tests. 4. Update samples. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
1 parent 02da6c4 commit d5c2b9d

File tree

19 files changed

+4533
-4295
lines changed

19 files changed

+4533
-4295
lines changed

python/samples/concepts/agents/bedrock_agent/bedrock_agent_with_code_interpreter.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import asyncio
4-
import os
4+
from pathlib import Path
55

66
from semantic_kernel.agents import BedrockAgent, BedrockAgentThread
77
from semantic_kernel.contents.binary_content import BinaryContent
@@ -57,8 +57,16 @@ async def main():
5757
if not binary_item:
5858
raise RuntimeError("No chart generated")
5959

60-
file_path = os.path.join(os.path.dirname(__file__), binary_item.metadata["name"])
61-
binary_item.write_to_file(os.path.join(os.path.dirname(__file__), binary_item.metadata["name"]))
60+
# Securely assemble the file path and validate it's within the expected directory
61+
# This is a defense-in-depth measure against directory traversal attacks
62+
output_dir = Path(__file__).parent.resolve()
63+
file_path = (output_dir / binary_item.metadata["name"]).resolve()
64+
65+
# Verify the resolved path is within the expected directory
66+
if not file_path.is_relative_to(output_dir):
67+
raise RuntimeError("Invalid filename: would write outside the expected directory")
68+
69+
binary_item.write_to_file(file_path)
6270
print(f"Chart saved to {file_path}")
6371

6472
# Sample output (using anthropic.claude-3-haiku-20240307-v1:0):

python/samples/concepts/agents/bedrock_agent/bedrock_agent_with_code_interpreter_streaming.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import asyncio
4-
import os
4+
from pathlib import Path
55

66
from semantic_kernel.agents import BedrockAgent, BedrockAgentThread
77
from semantic_kernel.contents.binary_content import BinaryContent
@@ -59,8 +59,16 @@ async def main():
5959
if not binary_item:
6060
raise RuntimeError("No chart generated")
6161

62-
file_path = os.path.join(os.path.dirname(__file__), binary_item.metadata["name"])
63-
binary_item.write_to_file(os.path.join(os.path.dirname(__file__), binary_item.metadata["name"]))
62+
# Securely assemble the file path and validate it's within the expected directory
63+
# This is a defense-in-depth measure against directory traversal attacks
64+
output_dir = Path(__file__).parent.resolve()
65+
file_path = (output_dir / binary_item.metadata["name"]).resolve()
66+
67+
# Verify the resolved path is within the expected directory
68+
if not file_path.is_relative_to(output_dir):
69+
raise RuntimeError("Invalid filename: would write outside the expected directory")
70+
71+
binary_item.write_to_file(file_path)
6472
print(f"Chart saved to {file_path}")
6573

6674
# Sample output (using anthropic.claude-3-haiku-20240307-v1:0):

python/semantic_kernel/agents/bedrock/bedrock_agent.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import asyncio
55
import logging
6+
import os
67
import sys
78
from collections.abc import AsyncIterable, Awaitable, Callable
89
from functools import partial, reduce
@@ -590,7 +591,7 @@ def _handle_files_event(self, event: dict[str, Any]) -> list[BinaryContent]:
590591
data=file["bytes"],
591592
data_format="base64",
592593
mime_type=file["type"],
593-
metadata={"name": file["name"]},
594+
metadata={"name": self._sanitize_filename(file["name"])},
594595
)
595596
for file in files_event["files"]
596597
]
@@ -639,7 +640,7 @@ def _handle_streaming_files_event(self, event: dict[str, Any]) -> StreamingChatM
639640
data=file["bytes"],
640641
data_format="base64",
641642
mime_type=file["type"],
642-
metadata={"name": file["name"]},
643+
metadata={"name": self._sanitize_filename(file["name"])},
643644
)
644645
for file in files_event["files"]
645646
]
@@ -720,3 +721,25 @@ async def _notify_thread_of_new_message(self, thread, new_message):
720721
The new message is passed to the agent when invoking the agent.
721722
"""
722723
pass
724+
725+
@staticmethod
726+
def _sanitize_filename(filename: str) -> str:
727+
"""Sanitize filename to prevent directory traversal attacks.
728+
729+
Args:
730+
filename: The filename to sanitize.
731+
732+
Returns:
733+
The sanitized filename with directory components removed.
734+
"""
735+
# Extract basename to remove any directory traversal attempts
736+
# Handle both Unix and Windows path separators
737+
sanitized = os.path.basename(filename.replace("\\", "/"))
738+
# Remove any remaining path separators or null bytes
739+
result = sanitized.replace("/", "").replace("\\", "").replace("\x00", "")
740+
if result != filename:
741+
logger.warning(
742+
f"Filename contained potentially malicious path components and was sanitized: "
743+
f"'{filename}' -> '{result}'"
744+
)
745+
return result

python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_tracing.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,14 @@ def __init__(self, diagnostics_settings: ModelDiagnosticSettings | None = None)
2222
Args:
2323
diagnostics_settings (ModelDiagnosticSettings, optional): Model diagnostics settings. Defaults to None.
2424
"""
25-
settings.tracing_implementation = "opentelemetry"
2625
super().__init__(diagnostics_settings=diagnostics_settings or ModelDiagnosticSettings())
26+
# Only set tracing implementation when diagnostics is enabled to avoid
27+
# interfering with method mocking in tests
28+
if (
29+
self.diagnostics_settings.enable_otel_diagnostics
30+
or self.diagnostics_settings.enable_otel_diagnostics_sensitive
31+
):
32+
settings.tracing_implementation = "opentelemetry"
2733

2834
def __enter__(self) -> None:
2935
"""Enable tracing.

python/semantic_kernel/contents/binary_content.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,21 @@ def from_element(cls: type[_T], element: Element) -> _T:
236236

237237
return cls(uri=element.get("uri", None))
238238

239-
def write_to_file(self, path: str | FilePath) -> None:
240-
"""Write the data to a file."""
239+
def write_to_file(self, path: str | FilePath, *, overwrite: bool = False) -> None:
240+
"""Write the data to a file.
241+
242+
Args:
243+
path: The path to write the file to.
244+
overwrite: If True, overwrite existing files. If False, raise an error if file exists.
245+
Defaults to False.
246+
247+
Raises:
248+
FileExistsError: If overwrite is False and the file already exists.
249+
"""
250+
file_path = Path(path)
251+
if not overwrite and file_path.exists():
252+
raise FileExistsError(f"File already exists and overwrite is disabled: {path}")
253+
241254
if self._data_uri and self._data_uri.data_array is not None:
242255
self._data_uri.data_array.tofile(path)
243256
return

python/tests/unit/agents/bedrock_agent/test_bedrock_agent.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,3 +671,81 @@ async def test_bedrock_agent_invoke_stream_with_function_call(
671671

672672

673673
# endregion
674+
675+
676+
# region Filename Sanitization Tests
677+
678+
679+
def test_sanitize_filename_simple():
680+
"""Test _sanitize_filename with a simple filename."""
681+
assert BedrockAgent._sanitize_filename("file.txt") == "file.txt"
682+
683+
684+
def test_sanitize_filename_with_spaces():
685+
"""Test _sanitize_filename with spaces in filename."""
686+
assert BedrockAgent._sanitize_filename("my file.txt") == "my file.txt"
687+
688+
689+
def test_sanitize_filename_directory_traversal_unix():
690+
"""Test _sanitize_filename strips Unix-style directory traversal."""
691+
assert BedrockAgent._sanitize_filename("../../../etc/passwd") == "passwd"
692+
assert BedrockAgent._sanitize_filename("../../file.txt") == "file.txt"
693+
assert BedrockAgent._sanitize_filename("/etc/passwd") == "passwd"
694+
695+
696+
def test_sanitize_filename_directory_traversal_windows():
697+
"""Test _sanitize_filename strips Windows-style directory traversal."""
698+
assert BedrockAgent._sanitize_filename("..\\..\\..\\Windows\\System32\\config") == "config"
699+
assert BedrockAgent._sanitize_filename("C:\\Users\\file.txt") == "file.txt"
700+
assert BedrockAgent._sanitize_filename("\\\\server\\share\\file.txt") == "file.txt"
701+
702+
703+
def test_sanitize_filename_mixed_separators():
704+
"""Test _sanitize_filename with mixed path separators."""
705+
assert BedrockAgent._sanitize_filename("../path\\to/file.txt") == "file.txt"
706+
assert BedrockAgent._sanitize_filename("..\\path/to\\file.txt") == "file.txt"
707+
708+
709+
def test_sanitize_filename_null_byte():
710+
"""Test _sanitize_filename removes null bytes."""
711+
assert BedrockAgent._sanitize_filename("file\x00.txt") == "file.txt"
712+
assert BedrockAgent._sanitize_filename("file.txt\x00.exe") == "file.txt.exe"
713+
714+
715+
def test_sanitize_filename_empty():
716+
"""Test _sanitize_filename returns empty string for empty result."""
717+
assert BedrockAgent._sanitize_filename("") == ""
718+
assert BedrockAgent._sanitize_filename("../") == ""
719+
assert BedrockAgent._sanitize_filename("..\\") == ""
720+
721+
722+
def test_sanitize_filename_only_dots():
723+
"""Test _sanitize_filename handles edge cases with dots."""
724+
# Note: os.path.basename("..") returns ".." which is kept as-is
725+
# Only "../" or "..\" patterns get stripped to empty string
726+
assert BedrockAgent._sanitize_filename(".") == "."
727+
728+
729+
def test_sanitize_filename_logs_warning(caplog):
730+
"""Test _sanitize_filename logs warning when filename is sanitized."""
731+
import logging
732+
733+
with caplog.at_level(logging.WARNING):
734+
result = BedrockAgent._sanitize_filename("../malicious/file.txt")
735+
assert result == "file.txt"
736+
assert "potentially malicious path components" in caplog.text
737+
assert "../malicious/file.txt" in caplog.text
738+
assert "file.txt" in caplog.text
739+
740+
741+
def test_sanitize_filename_no_warning_for_clean_filename(caplog):
742+
"""Test _sanitize_filename does not log warning for clean filenames."""
743+
import logging
744+
745+
with caplog.at_level(logging.WARNING):
746+
result = BedrockAgent._sanitize_filename("clean_file.txt")
747+
assert result == "clean_file.txt"
748+
assert "potentially malicious" not in caplog.text
749+
750+
751+
# endregion

python/tests/unit/agents/chat_completion/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ def kernel_with_ai_service():
2121
mock_ai_service_client.get_chat_message_contents = AsyncMock(
2222
return_value=[ChatMessageContent(role=AuthorRole.SYSTEM, content="Processed Message")]
2323
)
24+
kernel.plugins = {} # Ensure plugins dict is initialized to avoid AttributeError during tests
2425

2526
return kernel, mock_ai_service_client

python/tests/unit/agents/chat_completion/test_chat_completion_agent.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,10 @@ async def mock_get_chat_message_contents(
279279
arguments: KernelArguments,
280280
):
281281
responses = [
282-
ChatMessageContent(role=AuthorRole.TOOL, content="Tool Call Result"),
282+
ChatMessageContent(
283+
role=AuthorRole.TOOL,
284+
items=[FunctionResultContent(result="Tool Call Result")],
285+
),
283286
]
284287
chat_history.messages.extend(responses)
285288
return responses
@@ -289,7 +292,7 @@ async def mock_get_chat_message_contents(
289292
messages = [message async for message in agent.invoke(messages="test", thread=thread)]
290293

291294
assert len(messages) == 1
292-
assert messages[0].message.content == "Tool Call Result"
295+
assert messages[0].message.items[0].result == "Tool Call Result"
293296
assert messages[0].message.role == AuthorRole.TOOL
294297
assert messages[0].message.name == "TestAgent"
295298

@@ -298,7 +301,7 @@ async def mock_get_chat_message_contents(
298301

299302
assert len(thread_messages) == 2
300303
assert thread_messages[0].content == "test"
301-
assert thread_messages[1].content == "Tool Call Result"
304+
assert thread_messages[1].items[0].result == "Tool Call Result"
302305
assert thread_messages[1].name == "TestAgent"
303306
assert thread_messages[1].role == AuthorRole.TOOL
304307

python/tests/unit/connectors/ai/azure_ai_inference/conftest.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,17 @@ def model_diagnostics_test_env(monkeypatch, exclude_list, override_env_param_dic
8585
return env_vars
8686

8787

88+
@pytest.fixture()
89+
def disabled_model_diagnostics_test_env(monkeypatch):
90+
"""Fixture to disable diagnostics for tests that use mocking.
91+
92+
This is needed because AIInferenceInstrumentor's instrument/uninstrument
93+
cycle interferes with class-level mocking of ChatCompletionsClient.complete.
94+
"""
95+
monkeypatch.setenv("SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS", "false")
96+
monkeypatch.setenv("SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE", "false")
97+
98+
8899
@pytest.fixture(scope="function")
89100
def azure_ai_inference_client(azure_ai_inference_unit_test_env, request) -> ChatCompletionsClient | EmbeddingsClient:
90101
"""Fixture to create Azure AI Inference client for unit tests."""
@@ -164,8 +175,8 @@ def mock_azure_ai_inference_chat_completion_response_with_tool_call(model_id) ->
164175
ChatCompletionsToolCall(
165176
id="test_id",
166177
function=FunctionCall(
167-
name="test_function",
168-
arguments={"test_arg": "test_value"},
178+
name="getLightStatus",
179+
arguments='{"arg1": "test_value"}',
169180
),
170181
),
171182
],
@@ -254,7 +265,7 @@ def mock_azure_ai_inference_streaming_chat_completion_response_with_tool_call(mo
254265
id="test_id",
255266
function=FunctionCall(
256267
name="getLightStatus",
257-
arguments={"arg1": "test_value"},
268+
arguments='{"arg1": "test_value"}',
258269
),
259270
),
260271
],

python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,12 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior_
271271
async def test_azure_ai_inference_chat_completion_with_function_choice_behavior(
272272
mock_complete,
273273
azure_ai_inference_service,
274-
kernel,
274+
kernel: Kernel,
275275
chat_history: ChatHistory,
276276
mock_azure_ai_inference_chat_completion_response_with_tool_call,
277+
mock_azure_ai_inference_chat_completion_response,
278+
decorated_native_function,
279+
disabled_model_diagnostics_test_env,
277280
) -> None:
278281
"""Test completion of AzureAIInferenceChatCompletion with function choice behavior"""
279282
user_message_content: str = "Hello"
@@ -284,7 +287,13 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior(
284287
)
285288
settings.function_choice_behavior.maximum_auto_invoke_attempts = 1
286289

287-
mock_complete.return_value = mock_azure_ai_inference_chat_completion_response_with_tool_call
290+
# First call returns tool call, second call returns final response
291+
mock_complete.side_effect = [
292+
mock_azure_ai_inference_chat_completion_response_with_tool_call,
293+
mock_azure_ai_inference_chat_completion_response,
294+
]
295+
296+
kernel.add_function(plugin_name="TestPlugin", function=decorated_native_function)
288297

289298
responses = await azure_ai_inference_service.get_chat_message_contents(
290299
chat_history=chat_history,
@@ -293,13 +302,13 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior(
293302
arguments=KernelArguments(),
294303
)
295304

296-
# The function should be called twice:
305+
# Completion should be called twice:
297306
# One for the tool call and one for the last completion
298307
# after the maximum_auto_invoke_attempts is reached
299308
assert mock_complete.call_count == 2
300309
assert len(responses) == 1
301310
assert responses[0].role == "assistant"
302-
assert responses[0].finish_reason == FinishReason.TOOL_CALLS
311+
assert responses[0].content == "Hello"
303312

304313

305314
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)