Skip to content

Commit 328cdc6

Browse files
committed
Add constants in _smart_turn.py
1 parent 539b89b commit 328cdc6

1 file changed

Lines changed: 12 additions & 10 deletions

File tree

sdk/voice/speechmatics/voice/_smart_turn.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,12 @@ class SmartTurnDetector:
7878
Further information at https://github.com/pipecat-ai/smart-turn
7979
"""
8080

81-
WINDOW_SECONDS = 8
81+
# Constants
8282
DEFAULT_SAMPLE_RATE = 16000
83+
DEFAULT_THRESHOLD = 0.8
84+
WINDOW_SECONDS = 8
8385

84-
def __init__(self, auto_init: bool = True, threshold: float = 0.8):
86+
def __init__(self, auto_init: bool = True, threshold: float = DEFAULT_THRESHOLD):
8587
"""Create the new SmartTurnDetector.
8688
8789
Args:
@@ -164,13 +166,13 @@ def _prepare_audio(self, audio_array: bytes, sample_rate: int, sample_width: int
164166
165167
Args:
166168
audio_array: Numpy array containing audio samples at 16kHz. The function
167-
will convert the audio into float32 and truncate to 8 seconds (keeping the end)
168-
or pad to 8 seconds.
169+
will convert the audio into float32 and truncate to WINDOW_SECONDS (keeping the end)
170+
or pad to WINDOW_SECONDS seconds.
169171
sample_rate: Sample rate of the audio.
170172
sample_width: Sample width of the audio.
171173
172174
Returns:
173-
Numpy array containing audio samples at 16kHz.
175+
Numpy array containing audio samples at DEFAULT_SAMPLE_RATE.
174176
"""
175177
# Convert into numpy array
176178
dtype = np.int16 if sample_width == 2 else np.int8
@@ -217,9 +219,9 @@ async def predict(
217219
"""Predict whether an audio segment is complete (turn ended) or incomplete.
218220
219221
Args:
220-
audio_array: Numpy array containing audio samples at 16kHz. The function
221-
will convert the audio into float32 and truncate to 8 seconds (keeping the end)
222-
or pad to 8 seconds.
222+
audio_array: Numpy array containing audio samples at sample_rate. The function
223+
will convert the audio into float32 and truncate to WINDOW_SECONDS seconds (keeping the end)
224+
or pad to WINDOW_SECONDS seconds.
223225
language: Language of the audio.
224226
sample_rate: Sample rate of the audio.
225227
sample_width: Sample width of the audio.
@@ -265,7 +267,7 @@ async def predict(
265267

266268
@staticmethod
267269
def truncate_audio_to_last_n_seconds(
268-
audio_array: np.ndarray, n_seconds: float = 8.0, sample_rate: int = DEFAULT_SAMPLE_RATE
270+
audio_array: np.ndarray, n_seconds: float = WINDOW_SECONDS, sample_rate: int = DEFAULT_SAMPLE_RATE
269271
) -> np.ndarray:
270272
"""Truncate audio to last n seconds or pad with zeros to meet n seconds.
271273
@@ -303,7 +305,7 @@ def download_model() -> None:
303305
If not, it will download the model from HuggingFace.
304306
"""
305307

306-
# Check if model file exists
308+
# Check if model file already exists
307309
if SmartTurnDetector.model_exists():
308310
return
309311

0 commit comments

Comments
 (0)