-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdictee-test-rules
More file actions
executable file
·145 lines (125 loc) · 4.94 KB
/
Copy pathdictee-test-rules
File metadata and controls
executable file
·145 lines (125 loc) · 4.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/bin/bash
# dictee-test-rules — Test what Parakeet hears and how rules transform it
# Usage: dictee-test-rules [--loop] [--wav file.wav]
# No args: record once (Enter to stop), show raw + processed
# --loop: record repeatedly (Enter to stop each, Ctrl+C to quit)
# --wav: skip recording, transcribe an existing WAV file
set -euo pipefail
# ── Config ──────────────────────────────────────────────────────────────
CONF="$HOME/.config/dictee.conf"
if [ -f "$CONF" ]; then
# shellcheck disable=SC1090
source "$CONF"
fi
LANG_SOURCE="${DICTEE_LANG_SOURCE:-fr}"
TMPWAV="/tmp/dictee-test-recording.wav"
# ── Colors ──────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
NC='\033[0m'
# ── Helpers ─────────────────────────────────────────────────────────────
cleanup() {
rm -f "$TMPWAV"
# Kill pw-record if still running
if [ -n "${PW_PID:-}" ] && kill -0 "$PW_PID" 2>/dev/null; then
kill "$PW_PID" 2>/dev/null
wait "$PW_PID" 2>/dev/null
fi
}
trap cleanup EXIT
record_audio() {
echo -e "${DIM}Appuyez sur Entrée pour commencer l'enregistrement...${NC}"
read -r
pw-record --rate 16000 --channels 1 --format s16 "$TMPWAV" &
PW_PID=$!
sleep 0.3
echo -e "${RED}● Parlez maintenant...${NC} ${DIM}(Entrée pour arrêter)${NC}"
read -r
kill "$PW_PID" 2>/dev/null
wait "$PW_PID" 2>/dev/null || true
PW_PID=
}
transcribe() {
local wav="$1"
transcribe-client "$wav" 2>/dev/null
}
postprocess() {
local raw="$1"
echo "$raw" | DICTEE_LANG_SOURCE="$LANG_SOURCE" dictee-postprocess 2>/dev/null
}
show_result() {
local raw="$1"
local processed="$2"
echo -e "${CYAN}──────────────────────────────────────${NC}"
echo -e "${YELLOW}RAW${NC} ${BOLD}$raw${NC}"
# If raw contains non-latin characters, decide based on whether
# processing already transformed it.
if echo "$raw" | grep -qP '[А-Яа-яЁё]'; then
if [ "$raw" != "$processed" ]; then
# An existing rule already matched — no new rule needed.
echo -e "${RED}CYRILLIQUE${NC} ${GREEN}✓ déjà couvert par une règle existante${NC}"
else
# No rule matched — propose new ones (with \n and ", " variants).
local pattern
pattern=$(echo "$raw" | sed 's/[.?!,]*$//' | sed 's/[]\/$*.^[]/\\&/g')
echo -e "${RED}CYRILLIQUE${NC} ${DIM}Parakeet a détecté du cyrillique — aucune règle existante ne matche${NC}"
echo -e "${DIM}Règles candidates (choisis selon l'intention) :${NC}"
echo -e " ${DIM}→ pour « virgule » :${NC} ${GREEN}[fr] /^[,.\\s]*${pattern}[,.\\s]*/, /igm${NC}"
echo -e " ${DIM}→ pour « à la ligne » :${NC} ${GREEN}[fr] /^[,.\\s]*${pattern}[,.\\s]*/\\n/igm${NC}"
fi
fi
# Show processed with visible control chars
local display
display=$(printf '%s' "$processed" | sed ':a;N;$!ba;s/\n/\\n/g;s/\t/\\t/g')
if [ "$raw" = "$processed" ]; then
echo -e "${BLUE}PROCESSED${NC} ${DIM}(inchangé)${NC}"
else
echo -e "${GREEN}PROCESSED${NC} ${BOLD}$display${NC}"
fi
echo
}
# ── Main ────────────────────────────────────────────────────────────────
echo -e "${BOLD}dictee-test-rules${NC} — Langue: ${CYAN}$LANG_SOURCE${NC}"
echo -e "${DIM}Testez ce que Parakeet détecte et comment les règles le transforment.${NC}"
echo
LOOP=false
WAV_FILE=
while [ $# -gt 0 ]; do
case "$1" in
--loop) LOOP=true; shift ;;
--wav) WAV_FILE="$2"; shift 2 ;;
*) echo "Usage: $0 [--loop] [--wav file.wav]"; exit 1 ;;
esac
done
if [ -n "$WAV_FILE" ]; then
# Single file mode
raw=$(transcribe "$WAV_FILE")
processed=$(postprocess "$raw")
show_result "$raw" "$processed"
exit 0
fi
while true; do
record_audio
if [ ! -f "$TMPWAV" ]; then
echo -e "${RED}Pas d'audio enregistré.${NC}"
continue
fi
raw=$(transcribe "$TMPWAV")
if [ -z "$raw" ]; then
echo -e "${DIM}(silence détecté)${NC}"
echo
else
processed=$(postprocess "$raw")
show_result "$raw" "$processed"
fi
rm -f "$TMPWAV"
if [ "$LOOP" = false ]; then
break
fi
echo -e "${DIM}Prêt pour le prochain enregistrement... (Ctrl+C pour quitter)${NC}"
done