-
Notifications
You must be signed in to change notification settings - Fork 43
Add post-processing to meeting mode and dictation context #292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
b72d91b
f5c14a6
ba66441
1fdd105
4637653
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,7 @@ use pidlock::Pidlock; | |
| use std::path::PathBuf; | ||
| use std::process::Stdio; | ||
| use std::sync::Arc; | ||
| use std::time::Duration; | ||
| use std::time::{Duration, Instant}; | ||
| use tokio::process::Command; | ||
| use tokio::signal::unix::{signal, SignalKind}; | ||
|
|
||
|
|
@@ -475,6 +475,8 @@ pub struct Daemon { | |
| audio_feedback: Option<AudioFeedback>, | ||
| text_processor: TextProcessor, | ||
| post_processor: Option<PostProcessor>, | ||
| /// Last post-processed text and when it was produced, for context in subsequent dictations | ||
| last_dictation: Option<(String, Instant)>, | ||
| // Model manager for multi-model support | ||
| model_manager: Option<ModelManager>, | ||
| // Background task for loading model on-demand | ||
|
|
@@ -588,6 +590,7 @@ impl Daemon { | |
| audio_feedback, | ||
| text_processor, | ||
| post_processor, | ||
| last_dictation: None, | ||
| model_manager: None, | ||
| model_load_task: None, | ||
| transcription_task: None, | ||
|
|
@@ -1231,7 +1234,7 @@ impl Daemon { | |
|
|
||
| /// Handle transcription completion (called when transcription_task completes) | ||
| async fn handle_transcription_result( | ||
| &self, | ||
| &mut self, | ||
| state: &mut State, | ||
| result: std::result::Result<TranscriptionResult, tokio::task::JoinError>, | ||
| ) { | ||
|
|
@@ -1277,6 +1280,14 @@ impl Daemon { | |
| } | ||
| } | ||
|
|
||
| // Get context from last dictation if within 60 seconds | ||
| let recent_context = self.last_dictation.as_ref().and_then(|(text, when)| { | ||
| if when.elapsed() < Duration::from_secs(60) { | ||
| Some(text.as_str()) | ||
| } else { | ||
| None | ||
| } | ||
| }); | ||
| // Apply post-processing command (profile overrides default) | ||
| let final_text = if let Some(profile) = active_profile { | ||
| if let Some(ref cmd) = profile.post_process_command { | ||
|
|
@@ -1287,32 +1298,43 @@ impl Daemon { | |
| }; | ||
| let profile_processor = PostProcessor::new(&profile_config); | ||
| tracing::info!( | ||
| "Post-processing with profile: {:?}", | ||
| profile_override.as_ref().unwrap() | ||
| "Post-processing with profile: {:?}, context: {:?}", | ||
| profile_override.as_ref().unwrap(), | ||
| recent_context | ||
| ); | ||
|
Comment on lines
1299
to
1304
|
||
| let result = profile_processor.process(&processed_text).await; | ||
| tracing::info!("Post-processed: {:?}", result); | ||
| let result = profile_processor | ||
| .process_with_context(&processed_text, recent_context) | ||
| .await; | ||
| tracing::info!("Post-processed: {:?}, changed: {}", result, result != processed_text); | ||
| result | ||
| } else { | ||
| // Profile exists but has no post_process_command, use default | ||
| if let Some(ref post_processor) = self.post_processor { | ||
| tracing::info!("Post-processing: {:?}", processed_text); | ||
| let result = post_processor.process(&processed_text).await; | ||
| tracing::info!("Post-processed: {:?}", result); | ||
| tracing::info!("Post-processing: {:?}, context: {:?}", processed_text, recent_context); | ||
| let result = post_processor | ||
| .process_with_context(&processed_text, recent_context) | ||
| .await; | ||
| tracing::info!("Post-processed: {:?}, changed: {}", result, result != processed_text); | ||
| result | ||
| } else { | ||
| processed_text | ||
| } | ||
| } | ||
| } else if let Some(ref post_processor) = self.post_processor { | ||
| tracing::info!("Post-processing: {:?}", processed_text); | ||
| let result = post_processor.process(&processed_text).await; | ||
| tracing::info!("Post-processed: {:?}", result); | ||
| tracing::info!("Post-processing: {:?}, context: {:?}", processed_text, recent_context); | ||
| let result = post_processor | ||
| .process_with_context(&processed_text, recent_context) | ||
| .await; | ||
| tracing::info!("Post-processed: {:?}, changed: {}", result, result != processed_text); | ||
| result | ||
| } else { | ||
| processed_text | ||
| }; | ||
|
|
||
| // Track last dictation for context in subsequent post-processing | ||
| self.last_dictation = | ||
| Some((final_text.clone(), Instant::now())); | ||
|
|
||
| if smart_submit { | ||
| tracing::debug!( | ||
| "Smart auto-submit: final text after post-processing: {:?}", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,7 +40,9 @@ pub use state::{ChunkState, MeetingState}; | |
| pub use storage::{MeetingStorage, StorageConfig, StorageError}; | ||
|
|
||
| use crate::error::{MeetingError, Result}; | ||
| use crate::output::post_process::PostProcessor; | ||
| use crate::transcribe::{self, Transcriber}; | ||
| use std::collections::HashMap; | ||
| use std::sync::Arc; | ||
| use tokio::sync::mpsc; | ||
|
|
||
|
|
@@ -100,6 +102,10 @@ pub struct MeetingDaemon { | |
| transcriber: Option<Arc<dyn Transcriber>>, | ||
| engine_name: String, | ||
| event_tx: mpsc::Sender<MeetingEvent>, | ||
| post_processor: Option<PostProcessor>, | ||
| /// Previous chunk's post-processed text, tracked per audio source | ||
| /// so mic and loopback contexts don't bleed into each other | ||
| last_chunk_text: HashMap<AudioSource, String>, | ||
| } | ||
|
|
||
| impl MeetingDaemon { | ||
|
|
@@ -116,6 +122,15 @@ impl MeetingDaemon { | |
| Arc::from(transcribe::create_transcriber(app_config)?); | ||
| let engine_name = format!("{:?}", app_config.engine).to_lowercase(); | ||
|
|
||
| let post_processor = app_config.output.post_process.as_ref().map(|cfg| { | ||
| tracing::info!( | ||
| "Meeting post-processing enabled: command={:?}, timeout={}ms", | ||
| cfg.command, | ||
| cfg.timeout_ms | ||
| ); | ||
| PostProcessor::new(cfg) | ||
| }); | ||
|
|
||
| Ok(Self { | ||
| config, | ||
| state: MeetingState::Idle, | ||
|
|
@@ -124,6 +139,8 @@ impl MeetingDaemon { | |
| transcriber: Some(transcriber), | ||
| engine_name, | ||
| event_tx, | ||
| post_processor, | ||
| last_chunk_text: HashMap::new(), | ||
| }) | ||
| } | ||
|
|
||
|
|
@@ -190,6 +207,7 @@ impl MeetingDaemon { | |
| } | ||
|
|
||
| self.state = std::mem::take(&mut self.state).stop(); | ||
| self.last_chunk_text.clear(); | ||
|
|
||
| // Finalize meeting | ||
| if let Some(ref mut meeting) = self.current_meeting { | ||
|
|
@@ -280,10 +298,27 @@ impl MeetingDaemon { | |
| let mut buffer = processor.new_buffer(chunk_id, source, start_offset_ms); | ||
| buffer.add_samples(&samples); | ||
|
|
||
| let result = processor | ||
| let mut result = processor | ||
| .process_chunk(buffer) | ||
| .map_err(crate::error::VoxtypeError::Transcribe)?; | ||
|
|
||
| // Post-process segment text if configured | ||
| if let Some(ref post_processor) = self.post_processor { | ||
| let context = self.last_chunk_text.get(&source).map(|s| s.as_str()); | ||
| for segment in &mut result.segments { | ||
| if !segment.text.is_empty() { | ||
| segment.text = post_processor | ||
| .process_with_context(&segment.text, context) | ||
| .await; | ||
|
||
| } | ||
| } | ||
| // Update context for next chunk (per source) | ||
| if let Some(last_seg) = result.segments.last() { | ||
| self.last_chunk_text | ||
| .insert(source, last_seg.text.clone()); | ||
| } | ||
| } | ||
|
|
||
| // Add segments to transcript | ||
| if let Some(ref mut meeting) = self.current_meeting { | ||
| for segment in &result.segments { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,12 +36,14 @@ impl PostProcessor { | |
| } | ||
| } | ||
|
|
||
| /// Process text through the external command | ||
| /// Process text with optional context from a previous chunk | ||
| /// | ||
| /// When context is provided, it is passed via the VOXTYPE_CONTEXT environment | ||
| /// variable so the post-processing command can use it for continuity. | ||
| /// Stdin always contains only the current text, keeping existing scripts compatible. | ||
| /// Returns the processed text on success, or the original text on any failure. | ||
| /// This ensures voice-to-text always produces output even when post-processing fails. | ||
| pub async fn process(&self, text: &str) -> String { | ||
| match self.execute_command(text).await { | ||
| pub async fn process_with_context(&self, text: &str, context: Option<&str>) -> String { | ||
| match self.execute_command_with_env(text, context).await { | ||
| Ok(processed) => { | ||
| if processed.is_empty() { | ||
| tracing::warn!( | ||
|
|
@@ -64,13 +66,32 @@ impl PostProcessor { | |
| } | ||
| } | ||
|
|
||
| async fn execute_command(&self, text: &str) -> Result<String, PostProcessError> { | ||
| // Spawn command via shell for proper parsing of complex commands | ||
| let mut child = Command::new("sh") | ||
| .args(["-c", &self.command]) | ||
| /// Process text through the external command | ||
| /// | ||
| /// Returns the processed text on success, or the original text on any failure. | ||
| /// This ensures voice-to-text always produces output even when post-processing fails. | ||
| pub async fn process(&self, text: &str) -> String { | ||
| self.process_with_context(text, None).await | ||
| } | ||
|
|
||
| async fn execute_command_with_env( | ||
| &self, | ||
| text: &str, | ||
| context: Option<&str>, | ||
| ) -> Result<String, PostProcessError> { | ||
| let mut cmd = Command::new("sh"); | ||
| cmd.args(["-c", &self.command]) | ||
| .stdin(Stdio::piped()) | ||
| .stdout(Stdio::piped()) | ||
| .stderr(Stdio::piped()) | ||
| .stderr(Stdio::piped()); | ||
|
|
||
| // Always clear to prevent inheriting stale context from parent environment | ||
| cmd.env_remove("VOXTYPE_CONTEXT"); | ||
| if let Some(ctx) = context { | ||
| cmd.env("VOXTYPE_CONTEXT", ctx); | ||
| } | ||
|
Comment on lines
+88
to
+92
|
||
|
|
||
| let mut child = cmd | ||
| .spawn() | ||
| .map_err(|e| PostProcessError::SpawnFailed(e.to_string()))?; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
recent_contextis a&strborrowed fromself.last_dictationand is passed into async post-processing calls (.await). The function then updatesself.last_dictationafterwards, which is likely to cause a borrow-checker error (borrow ofselfheld across await, then mutated). To avoid this, materialize context into a local owned value (e.g., clone the string into anOption<String>and use.as_deref()when callingprocess_with_context).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 4637653. Cloned the context string into an owned
Option<String>and use.as_deref()at all three call sites. Same reasoning as the meeting/mod.rs fix: the original compiled fine under NLL, but the owned value is more robust.