-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
92ff77a
commit c6ce0dd
Showing
12 changed files
with
389 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
const { logLevel, loadModel, transcript, freeModel } = require('../voskjs') | ||
|
||
const { spellingEnglishCharacters } = require('./spellingEnglishCharacters') | ||
const { spellingItalianCharacters } = require('./spellingEnglishCharacters') | ||
|
||
|
||
/** | ||
* @see https://alphacephei.com/vosk/adaptation | ||
*/ | ||
async function main() { | ||
|
||
//const grammar = undefined | ||
const grammar = spellingEnglishCharacters | ||
//const grammar = spellingItalianCharacters | ||
|
||
// English language dynamic graph small model | ||
const modelDirectory = '../models/vosk-model-small-en-us-0.15' | ||
//const modelDirectory = '../models/vosk-model-en-us-aspire-0.2' | ||
|
||
// Italian language dynamic graph small model | ||
//const modelDirectory = '../models/vosk-model-small-it-0.4' | ||
|
||
//const audioFile = '../audio/2830-3980-0043.wav' // -> experience proves this | ||
//const audioFile = '../audio/4507-16021-0012.wav' // -> why should one hold on the way | ||
//const audioFile = '../audio/8455-210777-0068.wav' // -> your power is sufficient i said | ||
|
||
// English language ISO6346 samples | ||
const audioFile = '../audio/EN_CSQU3054383.wav' // -> charlie for c ... | ||
|
||
// Italian language ISO6346 samples | ||
//const audioFile = '../audio/IT_CSQU3054383.wav' | ||
//const audioFile = '../audio/IT_CSQU3054383_long.wav' | ||
//const audioFile = '../audio/IT_RAIU_690011_4_25_U1.wav' | ||
|
||
console.log(`model directory : ${modelDirectory}`) | ||
console.log(`speech file name : ${audioFile}`) | ||
console.log(`grammar : ${grammar}`) | ||
|
||
// set the vosk log level to silence | ||
logLevel(-1) | ||
|
||
// load in memory a Vosk directory model | ||
const { model, latency } = await loadModel(modelDirectory) | ||
|
||
console.log(`load model latency : ${latency}ms`) | ||
|
||
// speech recognition of an audio file | ||
try { | ||
const { result, latency } = await transcript(audioFile, model, {grammar}) | ||
|
||
console.log( result ) | ||
console.log(`transcript latency : ${latency}ms`) | ||
} | ||
catch (error) { | ||
console.error(error) | ||
} | ||
|
||
// free the Vosk runtime model | ||
freeModel(model) | ||
} | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
const spellingEnglishCharacters = [ | ||
// numbers | ||
'zero', | ||
'one', | ||
'two', | ||
'three', | ||
'four', | ||
'five', | ||
'six', | ||
'seven', | ||
'eight', | ||
'nine', | ||
|
||
// letters | ||
'alfa for a', | ||
'bravo for b', | ||
'charlie for c', | ||
'delta for d', | ||
'echo for e', | ||
'foxtrot for f', | ||
'golf for g', | ||
'hotel for h', | ||
'india for i', | ||
'juliet for j', | ||
'kilo for k', | ||
'lima for l', | ||
'mike for m', | ||
'november for n', | ||
'oscar for o', | ||
'papa for p', | ||
'quebec for q', | ||
'romeo for r', | ||
'sierra for s', | ||
'tango for t', | ||
'uniform for u', | ||
'victor for v', | ||
'whiskey for w', | ||
'x ray for x', | ||
'yankee for y', | ||
'zulu for z' | ||
|
||
/* | ||
// symbols | ||
'space', | ||
'tab', | ||
'point', | ||
'comma', | ||
'semicolon', | ||
'colon', | ||
'exclamation mark', | ||
'question mark', | ||
'tick', | ||
'backtick', | ||
'quotation mark', | ||
'apostrophe', | ||
'acute accent', | ||
'grave accent', | ||
'closing_inclined_quotes', | ||
'opening_inclined_quotes', | ||
'opening double quotes', | ||
'closing double quotes', | ||
'opening round bracket', | ||
'closing round bracket', | ||
'opening square bracket', | ||
'closing square bracket', | ||
'opening curly bracket', | ||
'closing curly bracket', | ||
'at sign', | ||
'asterisk_symbol', | ||
'hash', | ||
'percent sign', | ||
'vertical_bar', | ||
'slash', | ||
'backslash', | ||
'lira sign', | ||
'dollar sign', | ||
'ampersand', | ||
'caret', | ||
'equal sign', | ||
'dash', | ||
'plus sign', | ||
'grater-than sign', | ||
'less-then sign', | ||
'tilde', | ||
'underscore', | ||
'penny sign', | ||
'copyright sign', | ||
'division sign', | ||
'micron sign', | ||
'paragraph delimiter', | ||
'more or less sign', | ||
'trademark symbol', | ||
'section delimiter', | ||
'trademark sign', | ||
'japanese yen sign', | ||
'inverted question mark', | ||
'inverted exclamation mark' | ||
*/ | ||
] | ||
|
||
module.exports = { spellingEnglishCharacters } | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
const spellingItalianCharacters = [ | ||
// numbers | ||
'zero', | ||
'uno', | ||
'due', | ||
'tre', | ||
'quattro', | ||
'cinque', | ||
'sei', | ||
'sette', | ||
'otto', | ||
'nove', | ||
|
||
// letters | ||
'a come Ancona', | ||
'bi come Bologna', | ||
'ci come Cagliari', | ||
'di come Domodossola', | ||
'é come Empoli', | ||
'èffè come Firenze', | ||
'gi come Genova', | ||
'àcca come Hotel', | ||
'i come Imperia', | ||
'i lùnga come Jolly', | ||
'càppa come kursaal', | ||
'èllè come Livorno', | ||
'èmmè come Milano', | ||
'ènnè come Napoli', | ||
'ò come Otranto', | ||
'pi come Palermo', | ||
'cu come Quarto', | ||
'èrrè come Roma', | ||
'èssè come Savona', | ||
'ti come Torino', | ||
'u come Udine', | ||
'vu come Venezia', | ||
'vu dóppia come Washington', | ||
'ics come Xilofono', | ||
'ìpsilon', | ||
'zèta come Zara', | ||
'á con accento acuto', | ||
'à con accento grave', | ||
'é con accento acuto', | ||
'è con accento grave', | ||
'í con accento acuto', | ||
'ì con accento grave', | ||
'ó con accento acuto', | ||
'ò con accento grave', | ||
'ú con accento acuto', | ||
'ù con accento grave' | ||
/* | ||
'spazio', | ||
'tabulazione', | ||
'punto', | ||
'punto centrale', | ||
'virgola', | ||
'punto e virgola', | ||
'due punti', | ||
'punto esclamativo', | ||
'punto interrogativo', | ||
'virgoletta destra inclinata', | ||
'virgolette', | ||
'apostrofo', | ||
'accento acuto', | ||
'accento grave', | ||
'virgolette destre inclinate', | ||
'virgolette sinistre inclinate', | ||
'virgolette doppie aperte', | ||
'virgolette doppie chiuse', | ||
'parentesi tonda aperta', | ||
'parentesi tonda chiusa', | ||
'parentesi quadra aperta', | ||
'parentesi quadra chiusa', | ||
'parentesi graffa aperta', | ||
'parentesi graffa chiusa', | ||
'chiocciola', | ||
'simbolo asterisco', | ||
'simbolo cancelletto', | ||
'simbolo percento', | ||
'barra verticale', | ||
'barra', | ||
'barra retroversa', | ||
'simbolo valuta lira', | ||
'simbolo valuta dollaro', | ||
'simbolo e commerciale', | ||
'simbolo cappelletto', | ||
'simbolo uguale', | ||
'trattino', | ||
'simbolo più', | ||
'simbolo maggiore', | ||
'simbolo minore', | ||
'tilde', | ||
'trattino lungo', | ||
'sottolineato', | ||
'simbolo di centesimo', | ||
'simbolo di copyright', | ||
'simbolo di divisione', | ||
'simbolo micron', | ||
'delimitatore di paragrafo', | ||
'simbolo più o meno', | ||
'simbolo di marchio registrato', | ||
'delimitatore di sezione', | ||
'simbolo trademark', | ||
'simbolo valuta Yen Giapponese', | ||
'punto di domanda invertito', | ||
'punto esclamativo invertito', | ||
*/ | ||
] | ||
|
||
module.exports = { spellingItalianCharacters } | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
# | ||
# play an audio file | ||
# | ||
# ffplay -nodisp -autoexit -hide_banner -loglevel panic audio/mi_chiamo_giorgio.mp3.opus | ||
# opusdec --force-wav --quiet audio/mi_chiamo_giorgio.mp3.opus - | aplay | ||
# | ||
if [ $# -eq 0 ] | ||
then | ||
echo | ||
echo "play an audio file" | ||
echo "usage: $0 <audiofile>" | ||
echo | ||
exit | ||
fi | ||
|
||
ffplay -nodisp -autoexit -hide_banner -loglevel panic "$1" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
#!/bin/bash | ||
|
||
# sudo apt-get install sox ffmpeg | ||
|
||
if [ $# -eq 0 ] | ||
then | ||
echo "usage : $0 filename (without suffix)" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
# sudo apt-get install libopus0 opus-tools ffmpeg | ||
|
||
AUDIO_FILE=$1 | ||
WAV_FILE=$2 | ||
SAMPLE_RATE=16000 | ||
BUFFER_SIZE=4000 | ||
|
||
# ARGS_8: 8 bit 8KHz | ||
# | ||
# ffmpeg -loglevel panic -i $AUDIO_FILE -ac 1 -acodec pcm_u8 -ar 8000 $WAV_FILE -y | ||
|
||
|
||
# ARGS_16: 16 bit 16KHz | ||
# | ||
#ffmpeg -loglevel panic -i $AUDIO_FILE -ac 1 -ar 16000 $WAV_FILE -y | ||
ffmpeg -loglevel panic -i $AUDIO_FILE -ac 1 -acodec pcm_s16le -ar $SAMPLE_RATE -bufsize $BUFFER_SIZE $WAV_FILE -y |
Oops, something went wrong.