-
Notifications
You must be signed in to change notification settings - Fork 0
/
whisper-auto
executable file
·259 lines (228 loc) · 7.85 KB
/
whisper-auto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/bin/bash
# jaggz.h {who is at} gmail.com
ourdir="$(dirname "$(readlink -f "$0")")"
settings_fn="$ourdir/whisper-settings.sh"
opt_listen_secs=$((60*5))
opt=$1
function usage() {
cat <<-EOT
Usage: whisper-auto [options]
Records and uses whisper or whisper on a server to transcribe text
Options:
-t # Max time to listen (default is $opt_listen_secs seconds)
EOT
}
while shift; do
case "$opt" in
-t) opt_listen_secs=$1; shift ;;
-h|--help) usage; exit ;;
*) echo "Unknown argument: $opt" >&2; exit 1 ;;
esac
done
. "$ourdir/ansi.sh" # Get some colors in here
if [[ ! -f $settings_fn ]]; then
echo "There's no whisper-settings.sh file." >&2
echo "You should copy whisper-settings--example.sh to whisper-settings.sh to set overrides." >&2
else
. "$ourdir/whisper-settings.sh"
fi
# use_server: If you're running our whisper server
# (ie. cd server-flask && flask run)
# Set to 0 to disable, and we'll run whisper directly
use_server=0
use_server=1
server_host=localhost
server_port=5000
rec_max_time_all=30 # Never more than this
rec_max_time_voice=20 # Final voice/signal portion this length
# Override settings:
if [[ ${#whisper_server_hosts[@]} -gt 0 ]]; then
echo "${yele}whisper-settings.sh provides hosts to examine:$rste"
for ((i=0; i<${#whisper_server_hosts[@]}; i++)); do
# Fastest test to see if that port is reachable, and set
# server_host and _port for the first one found
testhost=${whisper_server_hosts[i]}
testport=${whisper_server_ports[i]}
echo " ${yele}Checking ${whie}$testhost:$testport$rste ..."
if nc -z -w1 ${whisper_server_hosts[i]} ${whisper_server_ports[i]} 2>/dev/null; then
server_host=${whisper_server_hosts[i]}
server_port=${whisper_server_ports[i]}
echo " ${bcyae}SUCCESS! Using server ${whie}$server_host:$server_port$rste"
break
fi
done
fi
################################################################
# The following are not used by this script if you are using
# the server (ie. if use_server=1 above), because that's
# configured at the server
# whisper_model: Examples from my whisper -h output:
# tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,
# large-v1,large-v2,large
whisper_model=large-v2
#whisper_lang_opts=()
whisper_lang_opts=(--language en) # This can be empty
# envact: Script to activate environment.
# THIS IS NOT USED IF YOU'RE USING THE SERVER, BECAUSE
# WE AREN'T USING PYTHON FROM THIS SCRIPT UNLESS WE'RE
# RUNNING WHISPER DIRECTLY.
# This means, if you have use_server=1 above, you can
# ignore these lines.
envact=~/venv/whisper/bin/activate
# envpattern: Regex used to test the python path so we don't have
# to re-init the env if we're in it
# if ! which python3 | grep "$envpattern" >/dev/null; then
envpattern=venv/whisper
################################################################
# Now we're back at things that affect us regardless of server
# mode or not...
################################################################
################################################################
################################################################
# Unless you're developing, you probably don't need to
# change anything below
# dd: desktop dir: used for icons we rename for status
# (and usually to run these scripts)
dd=~/Desktop
bin_wauto=whisper-auto
bin_wkill=whisper-kill-rec
cachedir=cache
stat_done='WH-Done'
stat_rec='WH-Rec'
stat_wh='WH-Whispering'
kill_name="End Rec"
# fn_rec_pid: Internal use. Our record command's PID to be killed
# by whisper-kill-rec. If you change this, change it
# in whisper-kill-rec too.
fn_rec_pid=/tmp/whisper-auto-rec.pid # No need to touch, probably.
fn_ui_pid=/tmp/whisper-auto-ui.pid # Must match ui source file!
[[ -e $fn_ui_pid ]] && {
echo " ${yele}Found UI PID File$rste"
}
_set_status_ui_sig () { [[ -e $fn_ui_pid ]] && kill "$1" $(<$fn_ui_pid); }
set_status_ui_rec () { _set_status_ui_sig -USR1; }
set_status_ui_whi () { _set_status_ui_sig -USR2; }
set_status_ui_rdy () { _set_status_ui_sig -HUP; }
# You can pick a different record command for your system.
# Note: Whisper will convert anything you give it to
# its needed 16000 Hz, s16le, mono
# sox's 'rec' has a strange delay on my system
# This would need to be updated to match the arecord method above
# record_command () { rec "$1"; }
#cmd_record () { arecord -f S16_LE -r 16000 "$1"; }
# We're using an alias now because the subshell from the fn() method was
# screwing up our grabbing the bg pid (it was taking our whisper-auto's pid)
# If you need a command that has the filename other than at the end (as the
# alias requires), then you'll have to put the command directly in record_audio()
# instead of using the alias
alias cmd_record="arecord -f S16_LE -r 16000"
record_audio () {
local rec_pid
local dur=0
if [[ $# -gt 1 ]]; then dur=$2; fi
###############################
######## RECORDING PROGRAM HERE
# arecord -f S16_LE -r 16000 -d "$opt_listen_secs" "$1" &
silence_settings=(silence 1 0.50 0.6% 1 0:01.5 2.6%)
rec -r 16000 -b 16 -e signed-integer -t wav "$1" \
trim 0 "$rec_max_time_all" \
"${silence_settings[@]}" \
trim 0 "$rec_max_time_voice" \
&
rec_pid=$!
###############################
echo "recorder's pid is: $rec_pid"
echo "$rec_pid" > "$fn_rec_pid"
echo "Recording to: $1"
sleep .1
echo
echo " ${bgree}-- RECORDING. Hit CTRL-C to Finish --$rste"
handle_sigint() {
echo "Caught SIGINT or SIGTERM; terminating recording."
kill -TERM "$rec_pid"
}
trap handle_sigint SIGINT # Catch signal
trap handle_sigint SIGTERM
wait "$rec_pid"
#rm "$fn_rec_pid" # Clean up tmp pid file
echo "Continuing to process audio..."
trap - SIGINT # reset the trap
trap - SIGTERM # reset the trap
}
# Initital renaming
rename_first_match () {
local src_base="$1"
local target="$2"
local files=("$1"*)
if [[ -e "${files[0]}" ]]; then
if [[ "${files[0]}" != "$target" ]]; then
mv "${files[0]}" "$target"
fi
fi
}
# ls -l "$dd/$bin_wauto"*
rename_first_match "$dd/$bin_wauto" "$dd/$stat_done"
rename_first_match "$dd/$bin_wkill" "$dd/$kill_name"
if [[ $use_server = 0 ]]; then
if ! which python3 | grep "$envpattern" >/dev/null; then
echo "Not in whisper venv. Activating now..."
. "$envact"
[[ $? > 0 ]] && {
echo "Couldn't activate env:\n$envact";
exit;
}
fi
fi
cd "$ourdir" || { echo "Error"; exit 1; }
[[ -d $cachedir ]] || mkdir -p $cachedir ||
{ echo "Error"; exit 1; }
wav="$cachedir/new.wav"
wavprior="$cachedir/new-prior.wav"
output="$cachedir/new.txt"
outputprior="$cachedir/new-prior.txt"
log="$cachedir/whisper.log"
[[ -a $wav ]] && mv "$wav" "$wavprior"
[[ -a $output ]] && mv "$output" "$outputprior"
set_status_icons () {
str="$1"
files=($dd/WH-*)
[[ -e ${files[0]} ]] && mv "${files[0]}" $dd/"$str"
}
set_status_icons "$stat_rec" &
set_status_ui_rec &
record_audio "$wav"
set_status_icons "$stat_wh"
set_status_ui_whi
copy_output_to_clipboard () {
str=$(cat "$output" | sed -e 's/^/ /; s/$/ /;' | tr '\r\n' ' ' |
awk '{$1=$1};1' | tr -d '\r\n')
printf '%s' "$str" | xsel -i -b
printf '%s' "$str" | xclip
printf '%s' "$str" | xclip -sel clip
echo " ^ Copied to clipboard"
}
if [[ $use_server = 0 ]]; then
echo "Running whisper..."
whisper --model "$whisper_model" --task transcribe \
"${whisper_lang_opts[@]}" \
--output_dir "$cachedir" \
--output_format txt \
"$wav" &>"$log"
echo "Done."
if [[ -f $outputx ]]; then
echo " ${brede}Output not created?$rste"
else
echo "File '$output' created:"
echo " $(ls -lgG "$output")"
printf "Result:\n ${yele}%s$rste\n" "$(cat "$output")"
copy_output_to_clipboard
fi
else
result=$(curl -s -F file=@"$wav" $server_host:$server_port)
printf "Result:\n ${yele}%s$rste\n" "$result"
echo "Storing in file $output"
printf '%s\n' "$result" > "$output"
copy_output_to_clipboard
fi
set_status_icons "$stat_done"
set_status_ui_rdy