Merge pull request #134 from fractalego/pronouncing-numbers

Pronouncing numbers
fractalego · Nov 1, 2024 · 60aaec1 · 60aaec1
2 parents 06b9073 + 8aba6e4
commit 60aaec1
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 10 deletions.
diff --git a/tests/config.json b/tests/config.json
@@ -10,10 +10,13 @@
   "max_recursion": 2,
   "frontend_port": 8090,
   "backend": {
-    "host": "aragorn",
+    "host": "localhost",
     "port": 8080,
     "token": "secret"
   },
+  "generation_config": {
+    "temperature": 0.4
+  },
   "listener_model": {
     "listener_hotword_logp": -8,
     "listener_volume_threshold": 0.6,

diff --git a/tests/test_speaker.py b/tests/test_speaker.py
@@ -4,7 +4,7 @@
 from unittest import TestCase
 
 from wafl.config import Configuration
-from wafl.speaker.fairseq_speaker import FairSeqSpeaker
+from wafl.speaker.tts_speaker import TTSSpeaker
 from wafl.speaker.soundfile_speaker import SoundFileSpeaker
 
 _wafl_greetings = """
@@ -17,24 +17,30 @@
 class TestSpeaker(TestCase):
     def test_voice(self):
         config = Configuration.load_local_config()
-        speaker = FairSeqSpeaker(config)
+        speaker = TTSSpeaker(config)
         text = "Hello world"
         asyncio.run(speaker.speak(text))
 
     def test_long_text(self):
         config = Configuration.load_local_config()
-        speaker = FairSeqSpeaker(config)
+        speaker = TTSSpeaker(config)
         text = (
             "Shall I compare you to a summer's day? Thou art more lovely and temperate."
         )
         asyncio.run(speaker.speak(text))
 
-    def test_number_pronunciation(self):
+    def test_number_pronunciation1(self):
         config = Configuration.load_local_config()
-        speaker = FairSeqSpeaker(config)
+        speaker = TTSSpeaker(config)
         text = "The time is 54 past 8"
         asyncio.run(speaker.speak(text))
 
+    def test_number_pronunciation2(self):
+        config = Configuration.load_local_config()
+        speaker = TTSSpeaker(config)
+        text = "The time is 8 54"
+        asyncio.run(speaker.speak(text))
+
     def test_on_sound(self):
         speaker = SoundFileSpeaker()
         speaker.speak(os.path.join(_path, "../wafl/sounds/activation.wav"))

diff --git a/wafl/interface/voice_interface.py b/wafl/interface/voice_interface.py
@@ -1,4 +1,3 @@
-import asyncio
 import os
 import random
 import re
@@ -7,7 +6,7 @@
 from wafl.interface.base_interface import BaseInterface
 from wafl.interface.utils import not_good_enough
 from wafl.listener.whisper_listener import WhisperListener
-from wafl.speaker.fairseq_speaker import FairSeqSpeaker
+from wafl.speaker.tts_speaker import TTSSpeaker
 from wafl.speaker.soundfile_speaker import SoundFileSpeaker
 
 _path = os.path.dirname(__file__)
@@ -27,7 +26,7 @@ def __init__(self, config):
         self._deactivation_sound_filename = self.__get_deactivation_sound_from_config(
             config
         )
-        self._speaker = FairSeqSpeaker(config)
+        self._speaker = TTSSpeaker(config)
         self._listener = WhisperListener(config)
         self._listener.set_timeout(
             config.get_value("listener_model")["listener_silence_timeout"]

diff --git a/wafl/speaker/fairseq_speaker.py → wafl/speaker/tts_speaker.py b/wafl/speaker/fairseq_speaker.py → wafl/speaker/tts_speaker.py
@@ -8,7 +8,7 @@
 from wafl.speaker.utils import convert_numbers_to_words
 
 
-class FairSeqSpeaker(BaseSpeaker):
+class TTSSpeaker(BaseSpeaker):
     def __init__(self, config):
         self._connector = SpeakerConnectorFactory.get_connector(config)
         self._p = pyaudio.PyAudio()