Skip to content

Commit c4d126d

Browse files
authored
refactor: sanitize log messages (#56)
- Filter out common password and token fields. - Use the same message arrow directions as in the Core and integration-node-library. - Also sanitize the incoming WS messages. Use the same log filter as in the integration-node-library. OAuth2 based fields are not yet implemented: these messages are not yet available in the Python integration library.
1 parent fe7e66b commit c4d126d

3 files changed

Lines changed: 129 additions & 48 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ _Changes in the next release_
1414

1515
### Changed
1616
- Improved WS msg processing with dedicated consumer, producer and router tasks with asyncio queues ([#47](https://github.com/unfoldedcircle/integration-python-library/pull/47)).
17+
- Sanitize log messages to prevent sensitive information exposure.
1718
- Updated GitHub actions.
1819

1920
---

tests/test_api.py

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
import unittest
22
from copy import deepcopy
33

4-
from ucapi.api import filter_log_msg_data
4+
from ucapi.api import sanitize_json_message
55
from ucapi.media_player import Attributes
66

77

8-
class TestFilterLogMsgData(unittest.TestCase):
8+
class TestSanitizeJsonMessage(unittest.TestCase):
99

1010
def test_no_modification_when_no_msg_data(self):
1111
data = {}
12-
result = filter_log_msg_data(data)
12+
result = sanitize_json_message(data)
1313
self.assertEqual(result, {}, "The result should be an empty dictionary")
1414

1515
def test_no_changes_when_media_image_url_not_present(self):
1616
data = {"msg_data": {"attributes": {"state": "playing", "volume": 50}}}
1717
original = deepcopy(data)
1818

19-
result = filter_log_msg_data(data)
19+
result = sanitize_json_message(data)
2020

2121
self.assertEqual(
2222
result,
@@ -36,9 +36,9 @@ def test_filtering_media_image_url_in_dict(self):
3636
expected_result = deepcopy(data)
3737
expected_result["msg_data"]["attributes"][
3838
Attributes.MEDIA_IMAGE_URL
39-
] = "data:***"
39+
] = "data:..."
4040

41-
result = filter_log_msg_data(data)
41+
result = sanitize_json_message(data)
4242

4343
self.assertEqual(
4444
result, expected_result, "The MEDIA_IMAGE_URL attribute should be filtered"
@@ -65,12 +65,12 @@ def test_filtering_media_image_url_in_list(self):
6565
expected_result = deepcopy(data)
6666
expected_result["msg_data"][0]["attributes"][
6767
Attributes.MEDIA_IMAGE_URL
68-
] = "data:***"
68+
] = "data:..."
6969
expected_result["msg_data"][1]["attributes"][
7070
Attributes.MEDIA_IMAGE_URL
71-
] = "data:***"
71+
] = "data:..."
7272

73-
result = filter_log_msg_data(data)
73+
result = sanitize_json_message(data)
7474

7575
self.assertEqual(
7676
result,
@@ -88,8 +88,48 @@ def test_input_is_not_modified(self):
8888
}
8989
original_data = deepcopy(data)
9090

91-
filter_log_msg_data(data)
91+
sanitize_json_message(data)
9292

9393
self.assertEqual(
9494
data, original_data, "The input data should not be modified by the function"
9595
)
96+
97+
def test_generic_sensitive_keys_redaction(self):
98+
sensitive_keys = [
99+
"token",
100+
"token_id",
101+
"access_token",
102+
"refresh_token",
103+
"id_token",
104+
"authorization_code",
105+
"client_secret",
106+
"secret",
107+
"auth_url",
108+
"client_data",
109+
"password",
110+
]
111+
112+
for key in sensitive_keys:
113+
msg = {key: "sensitive-value", "other": "public-value"}
114+
sanitized = sanitize_json_message(msg)
115+
self.assertEqual(
116+
sanitized[key], "***REDACTED***", f"{key} should be redacted"
117+
)
118+
self.assertEqual(
119+
sanitized["other"], "public-value", "public fields should remain intact"
120+
)
121+
122+
def test_recursive_redaction(self):
123+
msg = {
124+
"level1": {
125+
"token": "secret1",
126+
"level2": {"secret": "secret2", "public": "data"},
127+
},
128+
"array": [{"refresh_token": "secret3"}, "plain-string"],
129+
}
130+
sanitized = sanitize_json_message(msg)
131+
self.assertEqual(sanitized["level1"]["token"], "***REDACTED***")
132+
self.assertEqual(sanitized["level1"]["level2"]["secret"], "***REDACTED***")
133+
self.assertEqual(sanitized["level1"]["level2"]["public"], "data")
134+
self.assertEqual(sanitized["array"][0]["refresh_token"], "***REDACTED***")
135+
self.assertEqual(sanitized["array"][1], "plain-string")

ucapi/api.py

Lines changed: 78 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ async def _enqueue_ws_payload(self, websocket, payload: dict[str, Any]) -> None:
389389

390390
if _LOG.isEnabledFor(logging.DEBUG):
391391
_LOG.debug(
392-
"[%s] ->: %s", websocket.remote_address, filter_log_msg_data(payload)
392+
"[%s] <-: %s", websocket.remote_address, sanitize_json_message(payload)
393393
)
394394

395395
match payload.get("kind"):
@@ -513,7 +513,10 @@ async def _send_ws_event(
513513
await self._enqueue_ws_payload(websocket, data)
514514

515515
async def _process_ws_message(self, websocket, data: dict[str, Any]) -> None:
516-
_LOG.debug("[%s] <-: %s", websocket.remote_address, data)
516+
if _LOG.isEnabledFor(logging.DEBUG):
517+
_LOG.debug(
518+
"[%s] ->: %s", websocket.remote_address, sanitize_json_message(data)
519+
)
517520

518521
kind = data["kind"]
519522
req_id = data.get("id")
@@ -630,7 +633,7 @@ async def _process_ws_binary_message(self, websocket, data: bytes) -> None:
630633
"""
631634
if _LOG.isEnabledFor(logging.DEBUG):
632635
_LOG.debug(
633-
"[%s] <-: <binary %d bytes>", websocket.remote_address, len(data)
636+
"[%s] ->: <binary %d bytes>", websocket.remote_address, len(data)
634637
)
635638

636639
# Parse IntegrationMessage from bytes
@@ -1777,46 +1780,83 @@ def local_hostname() -> str:
17771780
)
17781781

17791782

1780-
def filter_log_msg_data(data: dict[str, Any]) -> dict[str, Any]:
1783+
_REDACTED_VALUE = "***REDACTED***"
1784+
_SENSITIVE_KEYS = {
1785+
"token",
1786+
"token_id",
1787+
"access_token",
1788+
"refresh_token",
1789+
"id_token",
1790+
"authorization_code",
1791+
"client_secret",
1792+
"secret",
1793+
"auth_url",
1794+
"client_data",
1795+
"password",
1796+
}
1797+
1798+
1799+
def _filter_base64_images(json_data: Any) -> Any:
1800+
"""
1801+
Filter out base64 encoded images from a JSON object.
1802+
1803+
**Attention:** the provided JSON object is modified in-place!
1804+
1805+
:param json_data: The JSON object to filter.
1806+
:returns: The filtered JSON object.
17811807
"""
1782-
Filter attribute fields to exclude for log messages in the given msg data dict.
1808+
if json_data and isinstance(json_data, dict) and "msg_data" in json_data:
1809+
msg_data = json_data["msg_data"]
1810+
if isinstance(msg_data, list):
1811+
for item in msg_data:
1812+
if (
1813+
isinstance(item, dict)
1814+
and "attributes" in item
1815+
and isinstance(item["attributes"], dict)
1816+
and item["attributes"]
1817+
.get(MediaAttr.MEDIA_IMAGE_URL, "")
1818+
.startswith("data:")
1819+
):
1820+
item["attributes"][MediaAttr.MEDIA_IMAGE_URL] = "data:..."
1821+
elif (
1822+
isinstance(msg_data, dict)
1823+
and "attributes" in msg_data
1824+
and isinstance(msg_data["attributes"], dict)
1825+
and msg_data["attributes"]
1826+
.get(MediaAttr.MEDIA_IMAGE_URL, "")
1827+
.startswith("data:")
1828+
):
1829+
msg_data["attributes"][MediaAttr.MEDIA_IMAGE_URL] = "data:..."
1830+
return json_data
17831831

1784-
- Attributes are filtered in `data["msg_data"]`:
1785-
- dict object: key `attributes`
1786-
- list object: every list item `attributes`
1787-
- Filtered attributes: `MEDIA_IMAGE_URL`
17881832

1789-
:param data: the message data dict
1790-
:return: copy of the message data dict with filtered attributes
1833+
def sanitize_json_message(data: Any) -> Any:
1834+
"""
1835+
Sanitizes a JSON message by redacting sensitive fields such as tokens and secrets.
1836+
1837+
Base64 encoded images starting with `data:` are removed in `msg_data.attributes.media_image_url`
1838+
fields to limit log output.
1839+
1840+
The original message is not modified, the returned redacted message is a deepcopy.
1841+
1842+
:param data: The JSON object to be sanitized.
1843+
:return: The sanitized JSON object with sensitive information redacted.
17911844
"""
17921845
# do not modify the original dict
1793-
log_upd = deepcopy(data)
1794-
if not log_upd:
1846+
json_upd = deepcopy(data)
1847+
if not json_upd:
17951848
return {}
17961849

1797-
# filter out base64 encoded images in the media player's media_image_url attribute
1798-
if "msg_data" in log_upd:
1799-
if (
1800-
"attributes" in log_upd["msg_data"]
1801-
and MediaAttr.MEDIA_IMAGE_URL in log_upd["msg_data"]["attributes"]
1802-
and (
1803-
media_image_url := log_upd["msg_data"]["attributes"][
1804-
MediaAttr.MEDIA_IMAGE_URL
1805-
]
1806-
)
1807-
and media_image_url.startswith("data:")
1808-
):
1809-
log_upd["msg_data"]["attributes"][MediaAttr.MEDIA_IMAGE_URL] = "data:***"
1810-
elif isinstance(log_upd["msg_data"], list):
1811-
for item in log_upd["msg_data"]:
1812-
if (
1813-
"attributes" in item
1814-
and MediaAttr.MEDIA_IMAGE_URL in item["attributes"]
1815-
and (
1816-
media_image_url := item["attributes"][MediaAttr.MEDIA_IMAGE_URL]
1817-
)
1818-
and media_image_url.startswith("data:")
1819-
):
1820-
item["attributes"][MediaAttr.MEDIA_IMAGE_URL] = "data:***"
1850+
def sanitize_for_logging(value: Any) -> Any:
1851+
if value and isinstance(value, (dict, list)):
1852+
if isinstance(value, list):
1853+
return [sanitize_for_logging(item) for item in value]
1854+
1855+
for k, v in value.items():
1856+
if k in _SENSITIVE_KEYS:
1857+
value[k] = _REDACTED_VALUE
1858+
else:
1859+
value[k] = sanitize_for_logging(v)
1860+
return value
18211861

1822-
return log_upd
1862+
return sanitize_for_logging(_filter_base64_images(json_upd))

0 commit comments

Comments
 (0)