update

qbc2016 · qbc2016 · commit 2a6c5d72819e · 2026-06-10T19:25:13.000+08:00
diff --git a/src/agentscope/tts/_dashscope/_models/qwen3-tts-flash-realtime.yaml b/src/agentscope/tts/_dashscope/_models/qwen3-tts-flash-realtime.yaml
@@ -7,7 +7,7 @@ input_types:
   - text/plain
 
 output_types:
-  - audio/pcm
+  - audio/wav
 
 voices:
   # Source: https://help.aliyun.com/zh/model-studio/qwen-tts-voice-list
diff --git a/src/agentscope/tts/_dashscope/_realtime_model.py b/src/agentscope/tts/_dashscope/_realtime_model.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 """DashScope Realtime TTS model implementation."""
+import asyncio
 import base64
 import threading
-import time
 from typing import Any, AsyncGenerator, Literal, TYPE_CHECKING
 
 from pydantic import BaseModel, Field
@@ -71,9 +71,7 @@ def on_event(self, response: dict[str, Any]) -> None:
                     self.finish_event.set()
 
             except Exception:
-                import traceback
-
-                traceback.print_exc()
+                logger.exception("Error in TTS WebSocket callback")
                 self.finish_event.set()
 
         def on_close(self, close_status_code: int, close_msg: str) -> None:
@@ -148,7 +146,7 @@ async def get_audio_chunks(self) -> AsyncGenerator[TTSResponse, None]:
                 if self.chunk_event.is_set():
                     self.chunk_event.clear()
                 else:
-                    self.chunk_event.wait()
+                    await asyncio.to_thread(self.chunk_event.wait)
 
                 if self.finish_event.is_set():
                     continue
@@ -410,7 +408,9 @@ async def synthesize(
                 self._tts_client.commit()
                 self._tts_client.finish()
 
-                self._callback.finish_event.wait()
+                await asyncio.to_thread(
+                    self._callback.finish_event.wait,
+                )
 
                 if full_text and not self._callback.has_audio_data():
                     if attempt < self.max_retries - 1:
@@ -421,7 +421,7 @@ async def synthesize(
                             self.max_retries,
                             delay,
                         )
-                        time.sleep(delay)
+                        await asyncio.sleep(delay)
                         await self._reconnect()
                         unsent = full_text
                         delay *= 2
@@ -441,7 +441,7 @@ async def synthesize(
                         self.max_retries,
                         delay,
                     )
-                    time.sleep(delay)
+                    await asyncio.sleep(delay)
                     await self._reconnect()
                     unsent = full_text
                     delay *= 2
diff --git a/src/agentscope/tts/_tts_model_card.py b/src/agentscope/tts/_tts_model_card.py
@@ -11,6 +11,9 @@
 class TTSModelCard(BaseModel):
     """The model card class for TTS models."""
 
+    type: Literal["tts_model"] = "tts_model"
+    """The model card type discriminator."""
+
     name: str = Field(description="The name of the TTS model")
     """The model name."""
 
diff --git a/tests/tts_middleware_test.py b/tests/tts_middleware_test.py
@@ -2,7 +2,7 @@
 """Unit tests for TTSMiddleware."""
 import base64
 from typing import Any, AsyncGenerator
-from unittest.async_case import IsolatedAsyncioTestCase
+from unittest import IsolatedAsyncioTestCase
 from unittest.mock import AsyncMock, MagicMock
 
 from agentscope.event import (