whisper-auto

#!/bin/bash
# jaggz.h {who is at} gmail.com
ourdir="$(dirname "$(readlink -f "$0")")"
settings_fn="$ourdir/whisper-settings.sh"

opt_listen_secs=$((60*5))

opt=$1
function usage() {
	cat <<-EOT
		Usage: whisper-auto [options]
		Records and uses whisper or whisper on a server to transcribe text
		Options:
		   -t #   Max time to listen (default is $opt_listen_secs seconds)
		EOT
}

while shift; do
	case "$opt" in
		-t) opt_listen_secs=$1; shift ;;
		-h|--help) usage; exit ;;
		*) echo "Unknown argument: $opt" >&2; exit 1 ;;
	esac
done

. "$ourdir/ansi.sh" # Get some colors in here
if [[ ! -f $settings_fn ]]; then
	echo "There's no whisper-settings.sh file." >&2
	echo "You should copy whisper-settings--example.sh to whisper-settings.sh to set overrides." >&2
else
	. "$ourdir/whisper-settings.sh"
fi

# use_server: If you're running our whisper server
#             (ie. cd server-flask && flask run)
#             Set to 0 to disable, and we'll run whisper directly
use_server=0
use_server=1
server_host=localhost
server_port=5000
rec_max_time_all=30      # Never more than this
rec_max_time_voice=20    # Final voice/signal portion this length

# Override settings:
if [[ ${#whisper_server_hosts[@]} -gt 0 ]]; then
	echo "${yele}whisper-settings.sh provides hosts to examine:$rste"
	for ((i=0; i<${#whisper_server_hosts[@]}; i++)); do
		# Fastest test to see if that port is reachable, and set
		# server_host and _port for the first one found
		testhost=${whisper_server_hosts[i]}
		testport=${whisper_server_ports[i]}
		echo "  ${yele}Checking ${whie}$testhost:$testport$rste ..."
		if nc -z -w1 ${whisper_server_hosts[i]} ${whisper_server_ports[i]} 2>/dev/null; then
			server_host=${whisper_server_hosts[i]}
			server_port=${whisper_server_ports[i]}
			echo "    ${bcyae}SUCCESS! Using server ${whie}$server_host:$server_port$rste"
			break
		fi
	done
fi

################################################################
# The following are not used by this script if you are using
# the server (ie. if use_server=1 above), because that's
# configured at the server

	# whisper_model: Examples from my whisper -h output:
	#  tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,
	#    large-v1,large-v2,large
	whisper_model=large-v2
	#whisper_lang_opts=()
	whisper_lang_opts=(--language en)  # This can be empty

	# envact: Script to activate environment.
	#         THIS IS NOT USED IF YOU'RE USING THE SERVER, BECAUSE
	#         WE AREN'T USING PYTHON FROM THIS SCRIPT UNLESS WE'RE
	#         RUNNING WHISPER DIRECTLY.
	#         This means, if you have use_server=1 above, you can
	#         ignore these lines.
	envact=~/venv/whisper/bin/activate
	# envpattern: Regex used to test the python path so we don't have
	#             to re-init the env if we're in it
	#     if ! which python3 | grep "$envpattern" >/dev/null; then
	envpattern=venv/whisper


################################################################
# Now we're back at things that affect us regardless of server
# mode or not...

################################################################
################################################################
################################################################
# Unless you're developing, you probably don't need to
# change anything below

# dd: desktop dir: used for icons we rename for status
#     (and usually to run these scripts)
dd=~/Desktop

bin_wauto=whisper-auto
bin_wkill=whisper-kill-rec

cachedir=cache

stat_done='WH-Done'
stat_rec='WH-Rec'
stat_wh='WH-Whispering'
kill_name="End Rec"

# fn_rec_pid: Internal use. Our record command's PID to be killed
#             by whisper-kill-rec. If you change this, change it
#             in whisper-kill-rec too.
fn_rec_pid=/tmp/whisper-auto-rec.pid  # No need to touch, probably.
fn_ui_pid=/tmp/whisper-auto-ui.pid # Must match ui source file!

[[ -e $fn_ui_pid ]] && {
	echo " ${yele}Found UI PID File$rste"
}
_set_status_ui_sig () { [[ -e $fn_ui_pid ]] && kill "$1" $(<$fn_ui_pid); }
set_status_ui_rec () { _set_status_ui_sig -USR1; }
set_status_ui_whi () { _set_status_ui_sig -USR2; }
set_status_ui_rdy () { _set_status_ui_sig -HUP; }


# You can pick a different record command for your system.
# Note: Whisper will convert anything you give it to
#   its needed   16000 Hz, s16le, mono
# sox's 'rec' has a strange delay on my system
# This would need to be updated to match the arecord method above
# record_command () { rec "$1"; }

#cmd_record () { arecord -f S16_LE -r 16000 "$1"; }
# We're using an alias now because the subshell from the fn() method was
# screwing up our grabbing the bg pid (it was taking our whisper-auto's pid)
# If you need a command that has the filename other than at the end (as the
# alias requires), then you'll have to put the command directly in record_audio()
# instead of using the alias
alias cmd_record="arecord -f S16_LE -r 16000"

record_audio () {
	local rec_pid
	local dur=0
	if [[ $# -gt 1 ]]; then dur=$2; fi

	###############################
	######## RECORDING PROGRAM HERE
	# arecord -f S16_LE -r 16000 -d "$opt_listen_secs" "$1" &
	silence_settings=(silence 1 0.50 0.6% 1 0:01.5 2.6%)
	rec -r 16000 -b 16 -e signed-integer -t wav "$1" \
		trim 0 "$rec_max_time_all" \
		"${silence_settings[@]}" \
		trim 0 "$rec_max_time_voice" \
		&
	rec_pid=$!
	###############################
	echo "recorder's pid is: $rec_pid"
	echo "$rec_pid" > "$fn_rec_pid"
	echo "Recording to: $1"
	sleep .1
	echo
	echo "	${bgree}-- RECORDING. Hit CTRL-C to Finish --$rste"

	handle_sigint() {
		echo "Caught SIGINT or SIGTERM; terminating recording."
		kill -TERM "$rec_pid"
	}
	trap handle_sigint SIGINT  # Catch signal
	trap handle_sigint SIGTERM
	wait "$rec_pid"
	#rm "$fn_rec_pid" # Clean up tmp pid file
	echo "Continuing to process audio..."
	trap - SIGINT              # reset the trap
	trap - SIGTERM              # reset the trap
}

# Initital renaming
rename_first_match () {
	local src_base="$1"
	local target="$2"
	local files=("$1"*)
	if [[ -e "${files[0]}" ]]; then
		if [[ "${files[0]}" != "$target" ]]; then
			mv "${files[0]}" "$target"
		fi
	fi
}
# ls -l "$dd/$bin_wauto"*
rename_first_match "$dd/$bin_wauto" "$dd/$stat_done"
rename_first_match "$dd/$bin_wkill" "$dd/$kill_name"

if [[ $use_server = 0 ]]; then
	if ! which python3 | grep "$envpattern" >/dev/null; then
		echo "Not in whisper venv. Activating now..."
		. "$envact"
		[[ $? > 0 ]] && {
			echo "Couldn't activate env:\n$envact";
			exit;
		}
	fi
fi

cd "$ourdir" || { echo "Error"; exit 1; }
[[ -d $cachedir ]] || mkdir -p $cachedir ||
	{ echo "Error"; exit 1; }
wav="$cachedir/new.wav"
wavprior="$cachedir/new-prior.wav"
output="$cachedir/new.txt"
outputprior="$cachedir/new-prior.txt"
log="$cachedir/whisper.log"
[[ -a $wav ]] && mv "$wav" "$wavprior"
[[ -a $output ]] && mv "$output" "$outputprior"

set_status_icons () {
	str="$1"
	files=($dd/WH-*)
	[[ -e ${files[0]} ]] && mv "${files[0]}" $dd/"$str"
}
set_status_icons "$stat_rec" &
set_status_ui_rec &
record_audio "$wav"
set_status_icons "$stat_wh"
set_status_ui_whi

copy_output_to_clipboard () {
	str=$(cat "$output" | sed -e 's/^/   /; s/$/  	/;' | tr '\r\n' ' ' |
		awk '{$1=$1};1' | tr -d '\r\n')
	printf '%s' "$str" | xsel -i -b
	printf '%s' "$str" | xclip
	printf '%s' "$str" | xclip -sel clip
	echo " ^ Copied to clipboard"
}

if [[ $use_server = 0 ]]; then
	echo "Running whisper..."
	whisper --model "$whisper_model" --task transcribe \
		"${whisper_lang_opts[@]}" \
		--output_dir "$cachedir" \
		--output_format txt \
		"$wav" &>"$log"
	echo "Done."
	if [[ -f $outputx ]]; then
		echo " ${brede}Output not created?$rste"
	else
		echo "File '$output' created:"
		echo "  $(ls -lgG "$output")"
		printf "Result:\n ${yele}%s$rste\n" "$(cat "$output")"
		copy_output_to_clipboard
	fi
else
	result=$(curl -s -F file=@"$wav" $server_host:$server_port)
	printf "Result:\n ${yele}%s$rste\n" "$result"
	echo "Storing in file $output"
	printf '%s\n' "$result" > "$output"
	copy_output_to_clipboard
fi

set_status_icons "$stat_done"
set_status_ui_rdy