fix(message): rate-limit "Could not parse" warnings per NAME (#1566)

Beennnn · edenhaus · web-flow · commit f87b6c995650 · 2026-05-08T00:36:54.000+02:00
* fix(message): rate-limit "Could not parse" warnings per NAME When a firmware version pushes messages whose on-wire format the lib does not yet support, every push triggers a `Could not parse <NAME>` WARNING via `_handle_error_or_analyse`. In one observed case a mower's spontaneous `getMapTrace` pushes produced 217 520 identical entries in 3 days (~70k/day, ~50/min during mowing), bloating the parent process' log file and any downstream sink (HA recorder DB, Loki, etc.). The first few warnings are useful — they let an operator notice that something is wrong. Beyond that, identical repeats add no information and only make the rest of the log harder to read. This adds a per-NAME counter and a threshold (default 3): the first 3 parse failures for a given message NAME are still logged at WARNING, followed by a single notice that further entries will be downgraded to DEBUG. After that, all subsequent parse failures for that NAME log at DEBUG. Counters are isolated per NAME, so a NEW message class hitting a parse error still surfaces normally. The 3 existing emission sites (`_handle_error_or_analyse` exception path, `_handle_error_or_analyse` ERROR-state path, `MessageBodyData` exception path) all route through the new helper. Behaviour for non-throttled flows (success, ANALYSE) is unchanged. Tests: - `test_warn_once_throttles_repeated_parse_failures` — calls `Message.handle()` `THRESHOLD + 2` times on a class whose `_handle` always raises, asserts THRESHOLD + 1 WARNINGs (incl. the "switching to DEBUG" notice) followed by 2 DEBUGs. - `test_warn_once_isolated_per_message_name` — burns through the threshold for one NAME, verifies a sibling NAME still WARNs on its first parse failure. - Full suite: 699/699 pass. Refs: #1376 * review: drop verbose comments, full counter reset via fixture, exact log assertions Address review feedback on #1566: - Remove the file-level explanatory comment block above the threshold - Drop the "Restart Home Assistant" line — the lib does not know its host - Replace per-NAME counter cleanup with a pytest fixture that wipes the whole dict before and after each test - Compare ``caplog.records`` against the full expected level/message list in order (the previous count-only assertion would have passed with WARNING/DEBUG entries interleaved out of order) * Refactor tests --------- Co-authored-by: Robert Resch <robert@resch.dev>
diff --git a/deebot_client/message.py b/deebot_client/message.py
@@ -25,6 +25,23 @@
 
 MessagePayloadType = str | bytes | bytearray | dict[str, Any]
 
+_PARSE_FAILURE_THRESHOLD = 3
+_parse_failure_counts: dict[str, int] = {}
+
+
+def _log_parse_failure(name: str, data: object, *, exc_info: bool = False) -> None:
+    count = _parse_failure_counts.get(name, 0) + 1
+    _parse_failure_counts[name] = count
+    if count <= _PARSE_FAILURE_THRESHOLD:
+        _LOGGER.warning("Could not parse %s: %s", name, data, exc_info=exc_info)
+        if count == _PARSE_FAILURE_THRESHOLD:
+            _LOGGER.warning(
+                "Further 'Could not parse %s' entries will be logged at DEBUG level",
+                name,
+            )
+    else:
+        _LOGGER.debug("Could not parse %s: %s", name, data, exc_info=exc_info)
+
 
 class HandlingState(IntEnum):
     """Handling state enum."""
@@ -65,7 +82,7 @@ def wrapper(cls: type[M], event_bus: EventBus, data: T) -> HandlingResult:
         try:
             response = func(cls, event_bus, data)
         except Exception:
-            _LOGGER.warning("Could not parse %s: %s", cls.NAME, data, exc_info=True)
+            _log_parse_failure(cls.NAME, data, exc_info=True)
             return HandlingResult(HandlingState.ERROR)
         else:
             # This happens if for some reason someone calls super() of an ABC where handle is not implemented
@@ -81,7 +98,7 @@ def wrapper(cls: type[M], event_bus: EventBus, data: T) -> HandlingResult:
                 _LOGGER.debug("Could not handle %s message: %s", cls.NAME, data)
                 return HandlingResult(HandlingState.ANALYSE_LOGGED, response.args)
             if response.state == HandlingState.ERROR:
-                _LOGGER.warning("Could not parse %s: %s", cls.NAME, data)
+                _log_parse_failure(cls.NAME, data)
             return response
 
     return wrapper
@@ -256,7 +273,7 @@ def __handle_body_data(
         try:
             response = cls._handle_body_data(event_bus, data)
         except Exception:
-            _LOGGER.warning("Could not parse %s: %s", cls.NAME, data, exc_info=True)
+            _log_parse_failure(cls.NAME, data, exc_info=True)
             return HandlingResult(HandlingState.ERROR)
         else:
             if response.state == HandlingState.ANALYSE:
diff --git a/tests/test_message.py b/tests/test_message.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
+import logging
 from unittest.mock import Mock
 
 import pytest
 
+from deebot_client import message as message_module
 from deebot_client.event_bus import EventBus
 from deebot_client.message import (
+    _PARSE_FAILURE_THRESHOLD,
     HandlingResult,
     HandlingState,
     Message,
@@ -67,3 +70,125 @@ def test_WronglyImplementedMessage() -> None:
     result = WronglyImplementedMessage.handle(event_bus, {})
 
     assert result.state == HandlingState.ERROR
+
+
+@pytest.fixture
+def _reset_parse_failure_counts() -> None:
+    message_module._parse_failure_counts.clear()
+
+
+class _AlwaysRaisingMessage(Message):
+    NAME = "AlwaysRaisingMessage_warnonce_test"
+
+    @classmethod
+    def _handle(
+        cls, _event_bus: EventBus, _message: MessagePayloadType
+    ) -> HandlingResult:
+        raise ValueError("simulated parse failure")
+
+
+class _OtherFailingMessage(Message):
+    NAME = "OtherFailingMessage_warnonce_test"
+
+    @classmethod
+    def _handle(
+        cls, _event_bus: EventBus, _message: MessagePayloadType
+    ) -> HandlingResult:
+        raise ValueError("simulated parse failure")
+
+
+@pytest.mark.usefixtures("_reset_parse_failure_counts")
+def test_warn_once_throttles_repeated_parse_failures(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    name = _AlwaysRaisingMessage.NAME
+    event_bus = Mock(spec_set=EventBus)
+    logger_name = "deebot_client.message"
+
+    with caplog.at_level(logging.DEBUG, logger=logger_name):
+        for i in range(_PARSE_FAILURE_THRESHOLD + 2):
+            assert (
+                _AlwaysRaisingMessage.handle(event_bus, {"i": i}).state
+                == HandlingState.ERROR
+            )
+
+    assert [
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name}: {{'i': 0}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name}: {{'i': 1}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name}: {{'i': 2}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Further 'Could not parse {name}' entries will be logged at DEBUG level",
+        ),
+        (
+            logger_name,
+            logging.DEBUG,
+            f"Could not parse {name}: {{'i': 3}}",
+        ),
+        (
+            logger_name,
+            logging.DEBUG,
+            f"Could not parse {name}: {{'i': 4}}",
+        ),
+    ] == caplog.record_tuples
+
+
+@pytest.mark.usefixtures("_reset_parse_failure_counts")
+def test_warn_once_isolated_per_message_name(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    name_a = _AlwaysRaisingMessage.NAME
+    name_b = _OtherFailingMessage.NAME
+    event_bus = Mock(spec_set=EventBus)
+    logger_name = "deebot_client.message"
+
+    with caplog.at_level(logging.DEBUG, logger=logger_name):
+        for i in range(_PARSE_FAILURE_THRESHOLD + 1):
+            _AlwaysRaisingMessage.handle(event_bus, {"a": i})
+        _OtherFailingMessage.handle(event_bus, {"b": "first"})
+
+    assert [
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name_a}: {{'a': 0}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name_a}: {{'a': 1}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name_a}: {{'a': 2}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Further 'Could not parse {name_a}' entries will be logged at DEBUG level",
+        ),
+        (
+            logger_name,
+            logging.DEBUG,
+            f"Could not parse {name_a}: {{'a': 3}}",
+        ),
+        (
+            logger_name,
+            logging.WARNING,
+            f"Could not parse {name_b}: {{'b': 'first'}}",
+        ),
+    ] == caplog.record_tuples