@@ -78,10 +78,12 @@ class SmartTurnDetector:
7878 Further information at https://github.com/pipecat-ai/smart-turn
7979 """
8080
81- WINDOW_SECONDS = 8
81+ # Constants
8282 DEFAULT_SAMPLE_RATE = 16000
83+ DEFAULT_THRESHOLD = 0.8
84+ WINDOW_SECONDS = 8
8385
84- def __init__ (self , auto_init : bool = True , threshold : float = 0.8 ):
86+ def __init__ (self , auto_init : bool = True , threshold : float = DEFAULT_THRESHOLD ):
8587 """Create the new SmartTurnDetector.
8688
8789 Args:
@@ -164,13 +166,13 @@ def _prepare_audio(self, audio_array: bytes, sample_rate: int, sample_width: int
164166
165167 Args:
166168 audio_array: Numpy array containing audio samples at 16kHz. The function
167- will convert the audio into float32 and truncate to 8 seconds (keeping the end)
168- or pad to 8 seconds.
169+ will convert the audio into float32 and truncate to WINDOW_SECONDS (keeping the end)
170+ or pad to WINDOW_SECONDS seconds.
169171 sample_rate: Sample rate of the audio.
170172 sample_width: Sample width of the audio.
171173
172174 Returns:
173- Numpy array containing audio samples at 16kHz .
175+ Numpy array containing audio samples at DEFAULT_SAMPLE_RATE .
174176 """
175177 # Convert into numpy array
176178 dtype = np .int16 if sample_width == 2 else np .int8
@@ -217,9 +219,9 @@ async def predict(
217219 """Predict whether an audio segment is complete (turn ended) or incomplete.
218220
219221 Args:
220- audio_array: Numpy array containing audio samples at 16kHz . The function
221- will convert the audio into float32 and truncate to 8 seconds (keeping the end)
222- or pad to 8 seconds.
222+ audio_array: Numpy array containing audio samples at sample_rate . The function
223+ will convert the audio into float32 and truncate to WINDOW_SECONDS seconds (keeping the end)
224+ or pad to WINDOW_SECONDS seconds.
223225 language: Language of the audio.
224226 sample_rate: Sample rate of the audio.
225227 sample_width: Sample width of the audio.
@@ -265,7 +267,7 @@ async def predict(
265267
266268 @staticmethod
267269 def truncate_audio_to_last_n_seconds (
268- audio_array : np .ndarray , n_seconds : float = 8.0 , sample_rate : int = DEFAULT_SAMPLE_RATE
270+ audio_array : np .ndarray , n_seconds : float = WINDOW_SECONDS , sample_rate : int = DEFAULT_SAMPLE_RATE
269271 ) -> np .ndarray :
270272 """Truncate audio to last n seconds or pad with zeros to meet n seconds.
271273
@@ -303,7 +305,7 @@ def download_model() -> None:
303305 If not, it will download the model from HuggingFace.
304306 """
305307
306- # Check if model file exists
308+ # Check if model file already exists
307309 if SmartTurnDetector .model_exists ():
308310 return
309311
0 commit comments