22
33import { chunkText , cleanTextForTTS } from '../utils/text-cleaner.js' ;
44
5- // Text splitting stream to break text into chunks
5+ // Text splitting stream to break text into chunks (enhanced for streaming)
66export class TextSplitterStream {
77 constructor ( ) {
88 this . chunks = [ ] ;
9+ this . pendingText = '' ;
910 this . closed = false ;
1011 }
1112
@@ -15,19 +16,58 @@ export class TextSplitterStream {
1516 return chunkText ( cleanedText ) ;
1617 }
1718
18- push ( text ) {
19- // Simple sentence splitting for now
20- const sentences = this . chunkText ( text ) || [ text ] ;
21- this . chunks . push ( ...sentences ) ;
19+ push ( ...texts ) {
20+ // Support both single text and multiple texts like the official implementation
21+ for ( const text of texts ) {
22+ this . pendingText += text ;
23+
24+ // Check if we have complete sentences to process
25+ const sentences = this . pendingText . split ( / (?< = [ . ! ? ] ) \s + / ) ;
26+
27+ // Keep the last fragment in case it's incomplete
28+ if ( sentences . length > 1 ) {
29+ this . pendingText = sentences . pop ( ) ;
30+
31+ // Process complete sentences
32+ for ( const sentence of sentences ) {
33+ if ( sentence . trim ( ) ) {
34+ const chunks = this . chunkText ( sentence ) || [ sentence ] ;
35+ this . chunks . push ( ...chunks ) ;
36+ }
37+ }
38+ }
39+ }
40+ }
41+
42+ flush ( ) {
43+ // Process any remaining text without waiting for sentence completion
44+ if ( this . pendingText . trim ( ) ) {
45+ const chunks = this . chunkText ( this . pendingText ) || [ this . pendingText ] ;
46+ this . chunks . push ( ...chunks ) ;
47+ this . pendingText = '' ;
48+ }
2249 }
2350
2451 close ( ) {
52+ // Flush any remaining text and close the stream
53+ this . flush ( ) ;
2554 this . closed = true ;
2655 }
2756
2857 async * [ Symbol . asyncIterator ] ( ) {
29- for ( const chunk of this . chunks ) {
30- yield chunk ;
58+ let processedIndex = 0 ;
59+
60+ while ( ! this . closed || processedIndex < this . chunks . length ) {
61+ // Yield any new chunks that have been added
62+ while ( processedIndex < this . chunks . length ) {
63+ yield this . chunks [ processedIndex ] ;
64+ processedIndex ++ ;
65+ }
66+
67+ // If not closed, wait a bit for more chunks
68+ if ( ! this . closed ) {
69+ await new Promise ( resolve => setTimeout ( resolve , 10 ) ) ;
70+ }
3171 }
3272 }
3373}
@@ -287,6 +327,10 @@ export class KokoroTTS {
287327 if ( this . session && this . voiceEmbeddings [ voice ] ) {
288328 try {
289329 const language = voice . startsWith ( 'a' ) ? 'a' : 'b' ; // Determine language from voice ID
330+
331+ // Get phonemes for the text chunk
332+ const phonemes = await this . phonemize ( text , language ) ;
333+
290334 const tokenIds = await this . tokenizeText ( text , language ) ;
291335 const inputIds = new BigInt64Array ( tokenIds . map ( id => BigInt ( id ) ) ) ;
292336
@@ -361,8 +405,10 @@ export class KokoroTTS {
361405 }
362406 }
363407
408+ // Yield with phonemes information like the official implementation
364409 yield {
365410 text,
411+ phonemes, // Include phonemes in the output
366412 audio : new RawAudio ( finalAudioData , sampleRate )
367413 } ;
368414 } catch ( modelError ) {
@@ -375,6 +421,7 @@ export class KokoroTTS {
375421 // Yield silence in case of error
376422 yield {
377423 text,
424+ phonemes : text , // Fallback phonemes
378425 audio : new RawAudio ( new Float32Array ( 24000 ) , 24000 )
379426 } ;
380427 }
0 commit comments