Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.0.4] - 2025-05-19

### Added

- Support end-of-utterance messages (DEL-24982)

## [3.0.3] - 2025-03-03

### Added
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.3
3.0.4
14 changes: 14 additions & 0 deletions speechmatics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,11 @@ def get_transcription_config(
]:
config[option] = True if args.get(option) else config.get(option)

if args.get("end_of_utterance_silence_trigger") is not None:
config["conversation_config"] = {
"end_of_utterance_silence_trigger": args.get("end_of_utterance_silence_trigger")
}

if args.get("volume_threshold") is not None:
config["audio_filtering_config"] = {
"volume_threshold": args.get("volume_threshold")
Expand Down Expand Up @@ -547,6 +552,13 @@ def audio_event_handler(message):
sys.stdout.write(f"{escape_seq}[{event_name}]\n")
transcripts.text += f"[{event_name}] "

def end_of_utterance_handler(message):
if print_json:
print(json.dumps(message))
return
sys.stdout.write("[EndOfUtterance]\n")
transcripts.text += "[EndOfUtterance]"

def partial_translation_handler(message):
if print_json:
print(json.dumps(message))
Expand Down Expand Up @@ -581,6 +593,8 @@ def end_of_transcript_handler(_):
# print transcription (if text was requested without translation)

api.add_event_handler(ServerMessageType.AudioEventStarted, audio_event_handler)
api.add_event_handler(ServerMessageType.EndOfUtterance, end_of_utterance_handler)

if print_json:
if enable_partials or enable_translation_partials:
api.add_event_handler(
Expand Down
8 changes: 8 additions & 0 deletions speechmatics/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,14 @@ def get_arg_parser():
default=None,
help=("Filter out quiet audio which falls below this threshold (0.0-100.0)"),
)
config_parser.add_argument(
"--end-of-utterance-silence-trigger",
dest="end_of_utterance_silence_trigger",
type=float,
default=None,
help=("Generate an EndOfUtterance message from the server after this many seconds of silence (0.0-2.0)"),

)
config_parser.add_argument(
"--remove-disfluencies",
default=None,
Expand Down
12 changes: 12 additions & 0 deletions speechmatics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,13 @@ class TranslationConfig:
def asdict(self):
return asdict(self)

@dataclass
class ConversationConfig:
"""Conversation config."""

end_of_utterance_silence_trigger: Optional[float] = None
"""How much silence in seconds is required to trigger end of utterance detection."""

@dataclass
class RTTranslationConfig(TranslationConfig):
"""Real-time mode: Translation config."""
Expand Down Expand Up @@ -268,6 +274,9 @@ class TranscriptionConfig(_TranscriptionConfig):
"""Indicates if partial translation, where words are produced
immediately, is enabled."""

conversation_config: Optional[ConversationConfig] = None
"""Optional configuration for end-of-utterance detection."""

translation_config: Optional[TranslationConfig] = None
"""Optional configuration for translation."""

Expand Down Expand Up @@ -531,6 +540,9 @@ class ServerMessageType(str, Enum):
AddTranscript = "AddTranscript"
"""Indicates the final transcript of a part of the audio."""

EndOfUtterance = "EndOfUtterance"
"""Indicates that an utterance has ended, based on silence"""

AudioEventStarted = "AudioEventStarted"
"""Indicates the start of an audio event."""

Expand Down
Loading