v. 0.3.9

solyarisoftware · May 11, 2021 · c6ce0dd · c6ce0dd
1 parent 92ff77a
commit c6ce0dd
Show file tree

Hide file tree

Showing 12 changed files with 389 additions and 62 deletions.
diff --git a/README.md b/README.md
@@ -49,23 +49,18 @@ The goal of the project is to:
 
 ## Install 
 
-### 1. Install Vosk engine and relative nodejs modules
+### 1. Install Vosk engine and this nodejs module 
 
-```bash
-# 1. install vosk-api engine
-pip3 install vosk 
-
-# 2. install vosk-api nodejs binding module
-npm install vosk
-
-# 3. install this module as local package 
-npm install @solyarisoftware/voskjs
+- Install vosk-api engine
+  ```bash
+  pip3 install vosk 
+  ```
+  See also: https://alphacephei.com/vosk/install
 
-# 3. or install this module as global package, to use CLI command voskjs 
-npm install -g @solyarisoftware/voskjs
-```
-
-See also: https://alphacephei.com/vosk/install
+- Install this module, as global package if you want to use CLI command `voskjs` 
+  ```bash
+  npm install -g @solyarisoftware/voskjs
+  ```
 
 
 ### 2. Install/Download Vosk models
@@ -84,8 +79,6 @@ unzip vosk-model-small-en-us-0.15.zip
 # Italian model model
 wget https://alphacephei.com/vosk/models/vosk-model-small-it-0.4.zip
 unzip vosk-model-small-it-0.4.zip
-
-cd ..
 ```
 
 More about available Vosk models here: https://alphacephei.com/vosk/models
@@ -98,7 +91,7 @@ Source: [Mozilla DeepSpeech audio samples](https://github.com/mozilla/DeepSpeech
 These files are used for some tests and comparisons.
 
 
-## Examples 
+## Usage 
 
 Some transcript usage examples [here](examples) 
 

diff --git a/audio/EN_CSQU3054383.wav b/audio/EN_CSQU3054383.wav
diff --git a/audio/IT_CSQU3054383.wav b/audio/IT_CSQU3054383.wav
diff --git a/audio/IT_CSQU3054383_long.wav b/audio/IT_CSQU3054383_long.wav
diff --git a/audio/IT_RAIU_690011_4_25_U1.wav b/audio/IT_RAIU_690011_4_25_U1.wav
diff --git a/examples/grammar_iso6346.js b/examples/grammar_iso6346.js
@@ -0,0 +1,62 @@
+const { logLevel, loadModel, transcript, freeModel } = require('../voskjs')
+
+const { spellingEnglishCharacters } = require('./spellingEnglishCharacters')
+const { spellingItalianCharacters } = require('./spellingEnglishCharacters')
+
+
+/**
+ * @see https://alphacephei.com/vosk/adaptation
+ */ 
+async function main() {
+
+  //const grammar = undefined
+  const grammar = spellingEnglishCharacters
+  //const grammar = spellingItalianCharacters
+
+  // English language dynamic graph small model
+  const modelDirectory = '../models/vosk-model-small-en-us-0.15'
+  //const modelDirectory = '../models/vosk-model-en-us-aspire-0.2'
+
+  // Italian language dynamic graph small model
+  //const modelDirectory = '../models/vosk-model-small-it-0.4'
+
+  //const audioFile = '../audio/2830-3980-0043.wav' // -> experience proves this
+  //const audioFile = '../audio/4507-16021-0012.wav' // -> why should one hold on the way
+  //const audioFile = '../audio/8455-210777-0068.wav' // -> your power is sufficient i said
+
+  // English language ISO6346 samples
+  const audioFile = '../audio/EN_CSQU3054383.wav' // -> charlie for c ...
+
+  // Italian language ISO6346 samples
+  //const audioFile = '../audio/IT_CSQU3054383.wav'
+  //const audioFile = '../audio/IT_CSQU3054383_long.wav'
+  //const audioFile = '../audio/IT_RAIU_690011_4_25_U1.wav'
+
+  console.log(`model directory      : ${modelDirectory}`)
+  console.log(`speech file name     : ${audioFile}`)
+  console.log(`grammar              : ${grammar}`)
+
+  // set the vosk log level to silence 
+  logLevel(-1) 
+
+  // load in memory a Vosk directory model
+  const { model, latency } = await loadModel(modelDirectory)
+
+  console.log(`load model latency   : ${latency}ms`)
+
+  // speech recognition of an audio file
+  try {
+    const { result, latency } = await transcript(audioFile, model, {grammar})
+
+    console.log( result )
+    console.log(`transcript latency : ${latency}ms`)
+  }  
+  catch (error) {
+    console.error(error) 
+  }  
+
+  // free the Vosk runtime model
+  freeModel(model)
+}
+
+main()
diff --git a/examples/spellingEnglishCharacters.js b/examples/spellingEnglishCharacters.js
@@ -0,0 +1,102 @@
+const spellingEnglishCharacters = [
+  // numbers
+  'zero',
+  'one',
+  'two',
+  'three',
+  'four',
+  'five',
+  'six',
+  'seven',
+  'eight',
+  'nine',
+
+  // letters  
+  'alfa for a',
+  'bravo for b',
+  'charlie for c',
+  'delta for d',
+  'echo for e',
+  'foxtrot for f',
+  'golf for g',
+  'hotel for h',
+  'india for i',
+  'juliet for j',
+  'kilo for k',
+  'lima for l',
+  'mike for m',
+  'november for n',
+  'oscar for o',
+  'papa for p',
+  'quebec for q',
+  'romeo for r',
+  'sierra for s',
+  'tango for t',
+  'uniform for u',
+  'victor for v',
+  'whiskey for w',
+  'x ray for x',
+  'yankee for y',
+  'zulu for z'
+
+  /*  
+  // symbols  
+  'space',
+  'tab',
+  'point',
+  'comma',
+  'semicolon',
+  'colon',
+  'exclamation mark',
+  'question mark',
+  'tick',
+  'backtick',
+  'quotation mark',
+  'apostrophe',
+  'acute accent',
+  'grave accent',
+  'closing_inclined_quotes',
+  'opening_inclined_quotes',
+  'opening double quotes',
+  'closing double quotes',
+  'opening round bracket',
+  'closing round bracket',
+  'opening square bracket',
+  'closing square bracket',
+  'opening curly bracket',
+  'closing curly bracket',
+  'at sign',
+  'asterisk_symbol',
+  'hash',
+  'percent sign',
+  'vertical_bar',
+  'slash',
+  'backslash',
+  'lira sign',
+  'dollar sign',
+  'ampersand',
+  'caret',
+  'equal sign',
+  'dash',
+  'plus sign',
+  'grater-than sign',
+  'less-then sign',
+  'tilde',
+  'underscore',
+  'penny sign',
+  'copyright sign',
+  'division sign',
+  'micron sign',
+  'paragraph delimiter',
+  'more or less sign',
+  'trademark symbol',
+  'section delimiter',
+  'trademark sign',
+  'japanese yen sign',
+  'inverted question mark',
+  'inverted exclamation mark'
+  */
+]
+
+module.exports = { spellingEnglishCharacters }
+
diff --git a/examples/spellingItalianCharacters.js b/examples/spellingItalianCharacters.js
@@ -0,0 +1,111 @@
+const spellingItalianCharacters = [
+  // numbers
+  'zero',
+  'uno',
+  'due',
+  'tre',
+  'quattro',
+  'cinque',
+  'sei',
+  'sette',
+  'otto',
+  'nove',
+
+  // letters
+  'a come Ancona',
+  'bi come Bologna',
+  'ci come Cagliari',
+  'di come Domodossola',
+  'é come Empoli',
+  'èffè come Firenze',
+  'gi come Genova',
+  'àcca come Hotel',
+  'i come Imperia',
+  'i lùnga come Jolly',
+  'càppa come kursaal',
+  'èllè come Livorno',
+  'èmmè come Milano',
+  'ènnè come Napoli',
+  'ò come Otranto',
+  'pi come Palermo',
+  'cu come Quarto',
+  'èrrè come Roma',
+  'èssè come Savona',
+  'ti come Torino',
+  'u come Udine',
+  'vu come Venezia',
+  'vu dóppia come Washington',
+  'ics come Xilofono',
+  'ìpsilon',
+  'zèta come Zara',
+  'á con accento acuto',
+  'à con accento grave',
+  'é con accento acuto',
+  'è con accento grave',
+  'í con accento acuto',
+  'ì con accento grave',
+  'ó con accento acuto',
+  'ò con accento grave',
+  'ú con accento acuto',
+  'ù con accento grave'
+  /*
+  'spazio',
+  'tabulazione',
+  'punto',
+  'punto centrale',
+  'virgola',
+  'punto e virgola',
+  'due punti',
+  'punto esclamativo',
+  'punto interrogativo',
+  'virgoletta destra inclinata',
+  'virgolette',
+  'apostrofo',
+  'accento acuto',
+  'accento grave',
+  'virgolette destre inclinate',
+  'virgolette sinistre inclinate',
+  'virgolette doppie aperte',
+  'virgolette doppie chiuse',
+  'parentesi tonda aperta',
+  'parentesi tonda chiusa',
+  'parentesi quadra aperta',
+  'parentesi quadra chiusa',
+  'parentesi graffa aperta',
+  'parentesi graffa chiusa',
+  'chiocciola',
+  'simbolo asterisco',
+  'simbolo cancelletto',
+  'simbolo percento',
+  'barra verticale',
+  'barra',
+  'barra retroversa',
+  'simbolo valuta lira',
+  'simbolo valuta dollaro',
+  'simbolo e commerciale',
+  'simbolo cappelletto',
+  'simbolo uguale',
+  'trattino',
+  'simbolo più',
+  'simbolo maggiore',
+  'simbolo minore',
+  'tilde',
+  'trattino lungo',
+  'sottolineato',
+  'simbolo di centesimo',
+  'simbolo di copyright',
+  'simbolo di divisione',
+  'simbolo micron',
+  'delimitatore di paragrafo',
+  'simbolo più o meno',
+  'simbolo di marchio registrato',
+  'delimitatore di sezione',
+  'simbolo trademark',
+  'simbolo valuta Yen Giapponese',
+  'punto di domanda invertito',
+  'punto esclamativo invertito',
+  */
+]
+
+module.exports = { spellingItalianCharacters }
+
diff --git a/scripts/play.sh b/scripts/play.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+#
+# play an audio file
+#
+# ffplay -nodisp -autoexit -hide_banner -loglevel panic audio/mi_chiamo_giorgio.mp3.opus
+# opusdec --force-wav --quiet audio/mi_chiamo_giorgio.mp3.opus - | aplay
+#
+if [ $# -eq 0 ]
+  then
+    echo
+    echo "play an audio file"
+    echo "usage: $0 <audiofile>"
+    echo
+    exit
+fi
+
+ffplay -nodisp -autoexit -hide_banner -loglevel panic "$1"
+
diff --git a/tests/simplerec.sh → scripts/simplerec.sh b/tests/simplerec.sh → scripts/simplerec.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+# sudo apt-get install sox ffmpeg
+
 if [ $# -eq 0 ]
   then
     echo "usage  : $0 filename (without suffix)"

diff --git a/scripts/toWav.sh b/scripts/toWav.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# sudo apt-get install libopus0 opus-tools ffmpeg
+
+AUDIO_FILE=$1
+WAV_FILE=$2
+SAMPLE_RATE=16000
+BUFFER_SIZE=4000
+
+# ARGS_8: 8 bit 8KHz
+#
+# ffmpeg -loglevel panic -i $AUDIO_FILE -ac 1 -acodec pcm_u8 -ar 8000 $WAV_FILE -y
+
+
+# ARGS_16: 16 bit 16KHz
+#
+#ffmpeg -loglevel panic -i $AUDIO_FILE -ac 1 -ar 16000 $WAV_FILE -y
+ffmpeg -loglevel panic -i $AUDIO_FILE -ac 1 -acodec pcm_s16le -ar $SAMPLE_RATE -bufsize $BUFFER_SIZE $WAV_FILE -y