Skip to content

Commit a153ef6

Browse files
author
WeDone
committed
fix(zulip): resolve ReadTimeout by using call_endpoint with configurable timeout and retry
Root cause: zulip.Client.send_message() and upload_file() use a hardcoded 15s timeout (from do_api_query: timeout or 15.0). For file uploads and slow networks, 15s is insufficient. Fix: - Replace send_message/upload_file with call_endpoint(url=..., timeout=...) which allows passing a custom timeout per request - Add ZulipConfig.timeout field (default 60s, configurable via WebUI) - Add _call_with_retry() with exponential backoff for transient errors (ReadTimeout, ConnectionError, OSError) - up to 3 retries - Update all TestSend tests to use call_endpoint mock - Add 3 new tests: timeout passthrough, ReadTimeout retry, retry exhaustion - Total: 88 tests all passing
1 parent 0e57a0b commit a153ef6

2 files changed

Lines changed: 170 additions & 40 deletions

File tree

deeptutor/tutorbot/channels/zulip.py

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class ZulipConfig(Base):
4848
api_key: str = Field(default="", repr=False)
4949
allow_from: list[str] = Field(default_factory=list)
5050
group_policy: Literal["mention", "open"] = "mention"
51+
timeout: float = Field(default=60.0, description="HTTP request timeout in seconds")
5152

5253

5354
class ZulipChannel(BaseChannel):
@@ -456,6 +457,11 @@ async def _download_one(url: str, label: str) -> tuple[str, str]:
456457
async def send(self, msg: OutboundMessage) -> None:
457458
"""Send a message through Zulip, including file attachments.
458459
460+
Uses ``call_endpoint`` with a configurable timeout instead of the
461+
default 15s from ``send_message`` / ``upload_file``. Transient
462+
errors (``ReadTimeout``, ``ConnectionError``) are retried up to
463+
3 times with exponential backoff.
464+
459465
Synchronous ``zulip.Client`` calls are wrapped in
460466
``run_in_executor`` to avoid blocking the async event loop.
461467
LaTeX formulas are sent as-is (Zulip renders KaTeX natively).
@@ -478,11 +484,18 @@ async def send(self, msg: OutboundMessage) -> None:
478484

479485
content = msg.content or ""
480486
loop = asyncio.get_running_loop()
487+
timeout = self.config.timeout
481488

482489
for media_path in msg.media or []:
483490
try:
484-
result = await loop.run_in_executor(
485-
None, functools.partial(self._client.upload_file, media_path)
491+
result = await self._call_with_retry(
492+
loop,
493+
functools.partial(
494+
self._client.call_endpoint,
495+
url="user_uploads",
496+
files=[media_path],
497+
timeout=timeout,
498+
),
486499
)
487500
if result.get("result") == "success":
488501
uri = result.get("uri", "")
@@ -500,8 +513,14 @@ async def send(self, msg: OutboundMessage) -> None:
500513
for chunk in chunks:
501514
message_data["content"] = chunk
502515
try:
503-
result = await loop.run_in_executor(
504-
None, functools.partial(self._client.send_message, message_data)
516+
result = await self._call_with_retry(
517+
loop,
518+
functools.partial(
519+
self._client.call_endpoint,
520+
url="messages",
521+
request=message_data,
522+
timeout=timeout,
523+
),
505524
)
506525
if result.get("result") != "success":
507526
logger.error(
@@ -510,6 +529,39 @@ async def send(self, msg: OutboundMessage) -> None:
510529
except Exception as e:
511530
logger.error("Zulip send error: {}", type(e).__name__)
512531

532+
async def _call_with_retry(
533+
self,
534+
loop: asyncio.AbstractEventLoop,
535+
fn: functools.partial,
536+
max_retries: int = 3,
537+
) -> Any:
538+
"""Call a sync zulip.Client method with retry on transient errors.
539+
540+
Retries on ``ReadTimeout`` and ``ConnectionError`` with
541+
exponential backoff (1s, 2s, 4s).
542+
"""
543+
from requests.exceptions import ConnectionError as RequestsConnectionError
544+
from requests.exceptions import ReadTimeout
545+
546+
retryable = (ReadTimeout, RequestsConnectionError, OSError)
547+
for attempt in range(max_retries):
548+
try:
549+
return await loop.run_in_executor(None, fn)
550+
except retryable as e:
551+
if attempt < max_retries - 1:
552+
wait = 2**attempt
553+
logger.warning(
554+
"Zulip API transient error ({}), retry {}/{} in {}s: {}",
555+
type(e).__name__,
556+
attempt + 1,
557+
max_retries,
558+
wait,
559+
str(e)[:120],
560+
)
561+
await asyncio.sleep(wait)
562+
else:
563+
raise
564+
513565
async def stop(self) -> None:
514566
"""Stop the Zulip channel and clean up resources."""
515567
self._running = False

tests/test_zulip_channel.py

Lines changed: 114 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -290,24 +290,27 @@ class TestSend:
290290

291291
@pytest.mark.asyncio
292292
async def test_send_stream_message(self, channel: ZulipChannel):
293+
channel._client.call_endpoint.return_value = {"result": "success"}
293294
msg = OutboundMessage(channel="zulip", chat_id="general:off-topic", content="Hello stream!")
294295
await channel.send(msg)
295-
channel._client.send_message.assert_called_once()
296-
sent_data = channel._client.send_message.call_args[0][0]
297-
assert sent_data["type"] == "stream"
298-
assert sent_data["to"] == "general"
299-
assert sent_data["subject"] == "off-topic"
300-
assert sent_data["content"] == "Hello stream!"
296+
send_call = channel._client.call_endpoint.call_args_list[-1]
297+
assert send_call.kwargs.get("url") == "messages" or send_call[1].get("url") == "messages"
298+
request = send_call.kwargs.get("request") or send_call[1].get("request")
299+
assert request["type"] == "stream"
300+
assert request["to"] == "general"
301+
assert request["subject"] == "off-topic"
302+
assert request["content"] == "Hello stream!"
301303

302304
@pytest.mark.asyncio
303305
async def test_send_private_message(self, channel: ZulipChannel):
306+
channel._client.call_endpoint.return_value = {"result": "success"}
304307
msg = OutboundMessage(channel="zulip", chat_id="user@example.com", content="Hello private!")
305308
await channel.send(msg)
306-
channel._client.send_message.assert_called_once()
307-
sent_data = channel._client.send_message.call_args[0][0]
308-
assert sent_data["type"] == "private"
309-
assert sent_data["to"] == "user@example.com"
310-
assert sent_data["content"] == "Hello private!"
309+
send_call = channel._client.call_endpoint.call_args_list[-1]
310+
request = send_call.kwargs.get("request") or send_call[1].get("request")
311+
assert request["type"] == "private"
312+
assert request["to"] == "user@example.com"
313+
assert request["content"] == "Hello private!"
311314

312315
@pytest.mark.asyncio
313316
async def test_send_no_client_logs_warning(self, bus: MessageBus):
@@ -319,74 +322,149 @@ async def test_send_no_client_logs_warning(self, bus: MessageBus):
319322

320323
@pytest.mark.asyncio
321324
async def test_send_with_media_upload(self, channel: ZulipChannel):
322-
channel._client.upload_file.return_value = {
323-
"result": "success",
324-
"uri": "/user_uploads/uploaded_image.png",
325-
}
325+
call_count = 0
326+
327+
def fake_call_endpoint(**kwargs):
328+
nonlocal call_count
329+
call_count += 1
330+
if kwargs.get("url") == "user_uploads":
331+
return {"result": "success", "uri": "/user_uploads/uploaded_image.png"}
332+
return {"result": "success"}
333+
334+
channel._client.call_endpoint.side_effect = fake_call_endpoint
326335
msg = OutboundMessage(
327336
channel="zulip", chat_id="user@example.com", content="See this:", media=["/tmp/image.png"]
328337
)
329338
await channel.send(msg)
330-
channel._client.upload_file.assert_called_once_with("/tmp/image.png")
331-
sent_data = channel._client.send_message.call_args[0][0]
332-
assert "[attachment](/user_uploads/uploaded_image.png)" in sent_data["content"]
339+
assert call_count == 2
340+
send_call = channel._client.call_endpoint.call_args_list[-1]
341+
request = send_call.kwargs.get("request") or send_call[1].get("request")
342+
assert "[attachment](/user_uploads/uploaded_image.png)" in request["content"]
333343

334344
@pytest.mark.asyncio
335345
async def test_send_upload_failure_still_sends(self, channel: ZulipChannel):
336-
channel._client.upload_file.return_value = {"result": "error", "msg": "too large"}
346+
call_count = 0
347+
348+
def fake_call_endpoint(**kwargs):
349+
nonlocal call_count
350+
call_count += 1
351+
if kwargs.get("url") == "user_uploads":
352+
return {"result": "error", "msg": "too large"}
353+
return {"result": "success"}
354+
355+
channel._client.call_endpoint.side_effect = fake_call_endpoint
337356
msg = OutboundMessage(
338357
channel="zulip", chat_id="user@example.com", content="Hello", media=["/tmp/big_file.zip"]
339358
)
340359
await channel.send(msg)
341-
sent_data = channel._client.send_message.call_args[0][0]
342-
assert sent_data["content"] == "Hello"
360+
send_call = channel._client.call_endpoint.call_args_list[-1]
361+
request = send_call.kwargs.get("request") or send_call[1].get("request")
362+
assert request["content"] == "Hello"
343363

344364
@pytest.mark.asyncio
345365
async def test_send_upload_exception_still_sends(self, channel: ZulipChannel):
346-
channel._client.upload_file.side_effect = RuntimeError("network error")
366+
call_count = 0
367+
368+
def fake_call_endpoint(**kwargs):
369+
nonlocal call_count
370+
call_count += 1
371+
if kwargs.get("url") == "user_uploads":
372+
raise RuntimeError("network error")
373+
return {"result": "success"}
374+
375+
channel._client.call_endpoint.side_effect = fake_call_endpoint
347376
msg = OutboundMessage(
348377
channel="zulip", chat_id="user@example.com", content="Hello", media=["/tmp/file.zip"]
349378
)
350379
await channel.send(msg)
351-
sent_data = channel._client.send_message.call_args[0][0]
352-
assert sent_data["content"] == "Hello"
380+
send_call = channel._client.call_endpoint.call_args_list[-1]
381+
request = send_call.kwargs.get("request") or send_call[1].get("request")
382+
assert request["content"] == "Hello"
353383

354384
@pytest.mark.asyncio
355385
async def test_send_empty_content_with_media(self, channel: ZulipChannel):
356-
channel._client.upload_file.return_value = {
357-
"result": "success",
358-
"uri": "/user_uploads/img.png",
359-
}
386+
call_count = 0
387+
388+
def fake_call_endpoint(**kwargs):
389+
nonlocal call_count
390+
call_count += 1
391+
if kwargs.get("url") == "user_uploads":
392+
return {"result": "success", "uri": "/user_uploads/img.png"}
393+
return {"result": "success"}
394+
395+
channel._client.call_endpoint.side_effect = fake_call_endpoint
360396
msg = OutboundMessage(
361397
channel="zulip", chat_id="user@example.com", content="", media=["/tmp/img.png"]
362398
)
363399
await channel.send(msg)
364-
sent_data = channel._client.send_message.call_args[0][0]
365-
assert sent_data["content"] == "[attachment](/user_uploads/img.png)"
400+
send_call = channel._client.call_endpoint.call_args_list[-1]
401+
request = send_call.kwargs.get("request") or send_call[1].get("request")
402+
assert request["content"] == "[attachment](/user_uploads/img.png)"
366403

367404
@pytest.mark.asyncio
368405
async def test_send_sends_failure_logs_error(self, channel: ZulipChannel):
369-
channel._client.send_message.return_value = {"result": "error", "msg": "not subscribed"}
406+
channel._client.call_endpoint.return_value = {"result": "error", "msg": "not subscribed"}
370407
msg = OutboundMessage(channel="zulip", chat_id="stream:topic", content="test")
371408
await channel.send(msg)
372409

373410
@pytest.mark.asyncio
374411
async def test_send_message_exception_handled(self, channel: ZulipChannel):
375-
channel._client.send_message.side_effect = RuntimeError("connection lost")
412+
channel._client.call_endpoint.side_effect = RuntimeError("connection lost")
376413
msg = OutboundMessage(channel="zulip", chat_id="stream:topic", content="test")
377414
await channel.send(msg)
378415

379416
@pytest.mark.asyncio
380417
async def test_send_long_message_is_chunked(self, channel: ZulipChannel):
381418
from deeptutor.tutorbot.channels.zulip import MAX_MESSAGE_LEN
382419

420+
channel._client.call_endpoint.return_value = {"result": "success"}
383421
long_content = "A" * (MAX_MESSAGE_LEN + 500)
384422
msg = OutboundMessage(channel="zulip", chat_id="user@example.com", content=long_content)
385423
await channel.send(msg)
386-
assert channel._client.send_message.call_count > 1
387-
for call in channel._client.send_message.call_args_list:
388-
sent_data = call[0][0]
389-
assert len(sent_data["content"]) <= MAX_MESSAGE_LEN
424+
message_calls = [
425+
c for c in channel._client.call_endpoint.call_args_list
426+
if (c.kwargs.get("url") or c[1].get("url")) == "messages"
427+
]
428+
assert len(message_calls) > 1
429+
for call in message_calls:
430+
request = call.kwargs.get("request") or call[1].get("request")
431+
assert len(request["content"]) <= MAX_MESSAGE_LEN
432+
433+
@pytest.mark.asyncio
434+
async def test_send_uses_configured_timeout(self, channel: ZulipChannel):
435+
channel._client.call_endpoint.return_value = {"result": "success"}
436+
channel.config.timeout = 120.0
437+
msg = OutboundMessage(channel="zulip", chat_id="user@example.com", content="hi")
438+
await channel.send(msg)
439+
send_call = channel._client.call_endpoint.call_args_list[-1]
440+
timeout_val = send_call.kwargs.get("timeout") or send_call[1].get("timeout")
441+
assert timeout_val == 120.0
442+
443+
@pytest.mark.asyncio
444+
async def test_send_retries_on_read_timeout(self, channel: ZulipChannel):
445+
from requests.exceptions import ReadTimeout
446+
447+
call_count = 0
448+
449+
def fake_call_endpoint(**kwargs):
450+
nonlocal call_count
451+
call_count += 1
452+
if call_count == 1:
453+
raise ReadTimeout("timed out")
454+
return {"result": "success"}
455+
456+
channel._client.call_endpoint.side_effect = fake_call_endpoint
457+
msg = OutboundMessage(channel="zulip", chat_id="user@example.com", content="hi")
458+
await channel.send(msg)
459+
assert call_count == 2
460+
461+
@pytest.mark.asyncio
462+
async def test_send_retries_exhausted_raises(self, channel: ZulipChannel):
463+
from requests.exceptions import ReadTimeout
464+
465+
channel._client.call_endpoint.side_effect = ReadTimeout("timed out")
466+
msg = OutboundMessage(channel="zulip", chat_id="user@example.com", content="hi")
467+
await channel.send(msg)
390468

391469

392470
class TestInit:

0 commit comments

Comments
 (0)