franchesoni · parker-brown-family · May 25, 2026 · Jun 9, 2026
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Bootstrap: ensure faster-whisper is installed and the tiny model is pre-cached.
+set -euo pipefail
+
+VENV="$HOME/env_sandbox"
+
+echo "[s2t] Checking faster-whisper..."
+if ! "$VENV/bin/pip" show faster-whisper > /dev/null 2>&1; then
+    echo "[s2t] Installing faster-whisper..."
+    "$VENV/bin/pip" install faster-whisper
+else
+    echo "[s2t] faster-whisper already installed."
+fi
+
+echo "[s2t] Pre-warming Whisper tiny model cache..."
+"$VENV/bin/python3" - <<'PY'
+from faster_whisper import WhisperModel
+WhisperModel("tiny", device="cpu", compute_type="int8")
+print("[s2t] Model cached and ready.")
+PY
diff --git a/daemon.py b/daemon.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+"""S2T warm daemon — holds Whisper tiny model in RAM, serves transcription over HTTP on 127.0.0.1:7979."""
+
+import json
+import sys
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from pathlib import Path
+
+from faster_whisper import WhisperModel
+
+PORT = 7979
+model = None
+
+
+def load_model():
+    global model
+    print("Loading Whisper tiny model...", flush=True)
+    model = WhisperModel("tiny", device="cpu", compute_type="int8")
+    print(f"Model ready. Listening on 127.0.0.1:{PORT}", flush=True)
+
+
+class Handler(BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def do_GET(self):
+        if self.path == "/health":
+            self._respond(200, {"status": "ok"})
+        else:
+            self._respond(404, {"error": "not found"})
+
+    def do_POST(self):
+        if self.path == "/transcribe":
+            length = int(self.headers.get("Content-Length", 0))
+            body = json.loads(self.rfile.read(length))
+            audio_path = body.get("path", "")
+            if not Path(audio_path).exists():
+                self._respond(400, {"error": f"file not found: {audio_path}"})
+                return
+            segments, _ = model.transcribe(
+                audio_path, language="en", condition_on_previous_text=False
+            )
+            text = " ".join(seg.text.strip() for seg in segments)
+            self._respond(200, {"text": text})
+        else:
+            self._respond(404, {"error": "not found"})
+
+    def _respond(self, code, data):
+        body = json.dumps(data).encode()
+        self.send_response(code)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", len(body))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+if __name__ == "__main__":
+    load_model()
+    server = HTTPServer(("127.0.0.1", PORT), Handler)
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print("Daemon stopped.")
diff --git a/expand_phrases.py b/expand_phrases.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+"""
+Reads transcribed text, applies phrase expansions from phrases.json,
+writes result back to the same file.
+"""
+import sys
+import json
+import re
+from pathlib import Path
+
+def expand(text: str, phrases: dict) -> str:
+    # Sort longest phrases first so more specific matches win over partial ones
+    for phrase, expansion in sorted(phrases.items(), key=lambda x: -len(x[0])):
+        if phrase.startswith("_"):
+            continue
+        pattern = re.compile(re.escape(phrase), re.IGNORECASE)
+        text = pattern.sub(expansion, text)
+    return text.strip()
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: expand_phrases.py <transcription_file>", file=sys.stderr)
+        sys.exit(1)
+
+    transcript_path = Path(sys.argv[1])
+    phrases_path = Path(__file__).parent / "phrases.json"
+
+    if not phrases_path.exists():
+        sys.exit(0)
+
+    with open(phrases_path) as f:
+        phrases = json.load(f)
+
+    text = transcript_path.read_text().strip()
+    expanded = expand(text, phrases)
+
+    transcript_path.write_text(expanded + "\n")
+    print(f"[phrases] '{text}' -> '{expanded}'")
diff --git a/phrases.json b/phrases.json
@@ -0,0 +1,37 @@
+{
+  "_comment": "Map spoken phrases to expansions. Case-insensitive. Variants = common mishearings.",
+
+  "_section_commands": "--- ALE / project commands ---",
+  "spin up intellimass": "spin up intellimass",
+  "spin up intel amass": "spin up intellimass",
+  "spin up intellimess": "spin up intellimass",
+  "spin up intel mess": "spin up intellimass",
+  "spin up intel mass": "spin up intellimass",
+  "spin up ale": ".spin up ale",
+  "spin down": "spin down",
+  "Reload whisper phrases": "cd ~/s2t && python3 -c 'import json; d=json.load(open(\"phrases.json\")); print(len([k for k in d if not k.startswith(\"_\")]), \"phrases loaded\")' && cd -",
+  "It's been up in Hellimaz.": "spin up intellimass",
+  "This beautiful mass.": "spin up intellimass",
+   "Spin up and tell em ass": "spin up intellimass",
+   "Spin up and tell a mess.": "spin up intellimass",
+   "Spin up until I miss": "spin up intellimass",
+   "Spin up HC video.": "spin up hcvideo",
+   "Spin up HC video": "spin up hcvideo",
+   "Spin up headwaters.": "spin up head-waters",
+   " It's been up headwaters.": "spin up head-waters",
+   "Spin up headwaters": "spin up head-waters",
+
+
+  "_section_prefixes": "--- Common prompt prefixes ---",
+  "ok robot": "ok robot",
+  "direct": "--direct",
+
+  "_section_noise": "--- Suppress known hallucination garbage (set to empty string) ---",
+  "thanks for watching": "",
+  "thank you for watching": "",
+  "thank you.": "",
+  "you": "",
+  "the": "",
+  "♪": "",
+  "...": ""
+}
diff --git a/stop_and_process_recording.sh b/stop_and_process_recording.sh
@@ -1,30 +1,87 @@
 #!/bin/bash
 
+paste_transcription() {
+    local paste_mode="${S2T_PASTE_MODE:-auto}"
+    local window_id=""
+    local window_class=""
+    local window_name=""
+    local window_pid=""
+    local window_command=""
+    local window_signature=""
+    local log_file="${S2T_PASTE_LOG:-/tmp/s2t-paste.log}"
+
+    if [[ "$paste_mode" == "auto" ]]; then
+        window_id=$(xdotool getactivewindow 2>/dev/null || true)
+        if [[ -n "$window_id" ]]; then
+            window_class=$(xdotool getwindowclassname "$window_id" 2>/dev/null || true)
+            window_name=$(xdotool getwindowname "$window_id" 2>/dev/null || true)
+            window_pid=$(xdotool getwindowpid "$window_id" 2>/dev/null || true)
+            if [[ -n "$window_pid" ]]; then
+                window_command=$(ps -p "$window_pid" -o comm= -o args= 2>/dev/null | head -n 1 || true)
+            fi
+        fi
+
+        window_signature=$(printf '%s\n%s\n%s\n' "$window_class" "$window_name" "$window_command")
+
+        if printf '%s\n' "$window_signature" \
+            | grep -Eiq 'terminal|console|xterm|rxvt|urxvt|konsole|kitty|alacritty|wezterm|ghostty|foot|tilix|terminator|qterminal|lxterminal|mate-terminal|xfce4-terminal|gnome-terminal|ptyxis|st-256color|blackbox|warp|tabby|rio|contour|codex|claude|opencode|aider|agent|gpt-'; then
+            paste_mode="terminal"
+        elif [[ -z "${window_signature//[[:space:]]/}" ]]; then
+            paste_mode="terminal"
+        else
+            paste_mode="default"
+        fi
+
+        printf '%s paste_mode=%s window_id=%s class=%q name=%q pid=%s command=%q\n' \
+            "$(date -Is)" "$paste_mode" "$window_id" "$window_class" "$window_name" "$window_pid" "$window_command" \
+            >> "$log_file" 2>/dev/null || true
+    fi
+
+    case "$paste_mode" in
+        terminal)
+            xdotool key --clearmodifiers ctrl+shift+v
+            ;;
+        default)
+            xdotool key --clearmodifiers ctrl+v
+            ;;
+        *)
+            notify-send "S2T paste mode error" "Unknown S2T_PASTE_MODE: $paste_mode"
+            xdotool key --clearmodifiers ctrl+v
+            ;;
+    esac
+}
+
 # Stop recording
 kill $(cat $HOME/s2t/tmp/recording_pid)
 
-# Transcribe audio
-source $HOME/env_sandbox/bin/activate
-whisper $HOME/s2t/tmp/recording.wav --model tiny --output_dir="${HOME}/s2t/tmp/" --output_format="txt"
-deactivate
+AUDIO_FILE="$HOME/s2t/tmp/recording.wav"
+TEXT_FILE="$HOME/s2t/tmp/recording.txt"
+
+# Try warm daemon first (sub-second). Fall back to direct faster-whisper+tiny (~2s).
+if curl -sf --max-time 0.5 http://127.0.0.1:7979/health > /dev/null 2>&1; then
+    TEXT=$(curl -sf --max-time 15 -X POST http://127.0.0.1:7979/transcribe \
+        -H "Content-Type: application/json" \
+        -d "{\"path\":\"$AUDIO_FILE\"}" \
+        | python3 -c "import sys,json; print(json.load(sys.stdin)['text'])" 2>/dev/null)
+    echo "$TEXT" > "$TEXT_FILE"
+else
+    $HOME/env_sandbox/bin/python3 $HOME/s2t/transcribe.py "$AUDIO_FILE" "$TEXT_FILE"
+fi
 
-# Temporary file for transcription
-TRANSCRIPTION_FILE="$HOME/s2t/tmp/recording.txt"
+# Apply phrase expansions (phrases.json)
+$HOME/env_sandbox/bin/python3 $HOME/s2t/expand_phrases.py "$TEXT_FILE"
 
 # Copy transcription to clipboard
-xclip -selection clipboard < $TRANSCRIPTION_FILE
+xclip -selection clipboard < "$TEXT_FILE"
 
-# Optional: Notify the user that transcription is complete
-notify-send "Transcription Complete" "Your speech has been transcribed and is now in the clipboard."
+# Notify
+notify-send "Transcription Complete" "$(cat "$TEXT_FILE")"
 
 # Ensure the clipboard has time to update
 sleep 0.1
 
 # Simulate the paste action
-xdotool key ctrl+v  # Use whichever key combination is appropriate
+paste_transcription
 
 # Clean up
 rm -rf $HOME/s2t/tmp/
-
-
-
diff --git a/tests/test_s2t.sh b/tests/test_s2t.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+# S2T test suite — run from any directory.
+# Tests: direct transcription, daemon health, daemon transcription, fallback path.
+set -euo pipefail
+
+S2T="$HOME/s2t"
+VENV="$HOME/env_sandbox"
+DAEMON_URL="http://127.0.0.1:7979"
+TMP=$(mktemp -d)
+PASS=0
+FAIL=0
+
+cleanup() { rm -rf "$TMP"; }
+trap cleanup EXIT
+
+green() { echo -e "\033[0;32m[PASS]\033[0m $*"; PASS=$((PASS + 1)); }
+red()   { echo -e "\033[0;31m[FAIL]\033[0m $*"; FAIL=$((FAIL + 1)); }
+
+# Generate a short silent WAV for testing
+make_wav() {
+    ffmpeg -f lavfi -i "sine=frequency=1000:duration=1" "$1" -y -loglevel quiet
+}
+
+echo "=== S2T Test Suite ==="
+echo ""
+
+# --- Test 1: direct transcription via transcribe.py ---
+echo "[1] Direct transcription (transcribe.py)..."
+make_wav "$TMP/t1.wav"
+"$VENV/bin/python3" "$S2T/transcribe.py" "$TMP/t1.wav" "$TMP/t1.txt" 2>/dev/null
+if [[ -f "$TMP/t1.txt" ]]; then
+    green "transcribe.py produced output file"
+else
+    red "transcribe.py did not produce output file"
+fi
+
+# --- Test 2: phrase expansion ---
+echo "[2] Phrase expansion..."
+echo "test phrase" > "$TMP/expand.txt"
+"$VENV/bin/python3" "$S2T/expand_phrases.py" "$TMP/expand.txt" 2>/dev/null || true
+if [[ -f "$TMP/expand.txt" ]]; then
+    green "expand_phrases.py ran without error"
+else
+    red "expand_phrases.py failed"
+fi
+
+# --- Test 3: daemon health check ---
+echo "[3] Daemon health check..."
+if curl -sf --max-time 1 "$DAEMON_URL/health" > /dev/null 2>&1; then
+    green "Daemon is running and healthy"
+    DAEMON_UP=true
+else
+    echo "  [info] Daemon not running — skipping daemon transcription test"
+    echo "  (Run 'spin up voice' to start the daemon)"
+    DAEMON_UP=false
+fi
+
+# --- Test 4: daemon transcription (only if daemon is up) ---
+if [[ "$DAEMON_UP" == "true" ]]; then
+    echo "[4] Daemon transcription..."
+    make_wav "$TMP/t4.wav"
+    RESPONSE=$(curl -sf --max-time 15 -X POST "$DAEMON_URL/transcribe" \
+        -H "Content-Type: application/json" \
+        -d "{\"path\":\"$TMP/t4.wav\"}" 2>/dev/null)
+    if echo "$RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); assert 'text' in d" 2>/dev/null; then
+        green "Daemon returned transcription response"
+    else
+        red "Daemon transcription response malformed: $RESPONSE"
+    fi
+fi
+
+# --- Test 5: fallback path timing (daemon down simulation) ---
+echo "[5] Fallback transcription timing..."
+make_wav "$TMP/t5.wav"
+START=$(date +%s%N)
+"$VENV/bin/python3" "$S2T/transcribe.py" "$TMP/t5.wav" "$TMP/t5.txt" 2>/dev/null
+END=$(date +%s%N)
+ELAPSED=$(( (END - START) / 1000000 ))
+if [[ $ELAPSED -lt 10000 ]]; then
+    green "Fallback completed in ${ELAPSED}ms (under 10s)"
+else
+    red "Fallback took ${ELAPSED}ms — suspiciously slow"
+fi
+
+# --- Test 6: spin project registered ---
+echo "[6] Spin project registered..."
+if /home/pbrown/BROWN-FAMILY-SPORTS/Software/spin/bin/spin status voice 2>&1 | sed 's/\x1b\[[0-9;]*m//g' | grep -q "voice"; then
+    green "spin recognizes 'voice' project"
+else
+    red "spin does not recognize 'voice' project"
+fi
+
+echo ""
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[[ $FAIL -eq 0 ]]
diff --git a/transcribe.py b/transcribe.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+import sys
+from faster_whisper import WhisperModel
+
+audio_path = sys.argv[1]
+out_path = sys.argv[2]
+
+model = WhisperModel("tiny", device="cpu", compute_type="int8")
+segments, _ = model.transcribe(audio_path, language="en", condition_on_previous_text=False)
+
+text = " ".join(seg.text.strip() for seg in segments)
+with open(out_path, "w") as f:
+    f.write(text)