CorentinJ · Sep 3, 2020 · Sep 1, 2020 · Sep 3, 2020 · Sep 3, 2020 · Sep 3, 2020
diff --git a/demo_cli.py b/demo_cli.py
@@ -11,7 +11,7 @@
 import argparse
 import torch
 import sys
-
+from audioread.exceptions import NoBackendError
 
 if __name__ == '__main__':
     ## Info & args
@@ -34,12 +34,22 @@
         "If True, audio won't be played.")
     parser.add_argument("--seed", type=int, default=None, help=\
         "Optional random number seed value to make toolbox deterministic.")
+    parser.add_argument("--no_mp3_support", action="store_true", help=\
+        "If True, no mp3 files are allowed.")
     args = parser.parse_args()
     print_args(args, parser)
     if not args.no_sound:
         import sounddevice as sd
+
+    if not args.no_mp3_support:
+        try:
+            librosa.load(r"sample\sample_MP3.mp3")
+        except NoBackendError:
+            print("NoBackendError Exceptions raised please Install ffmpeg or rerun using no_mp3_support")
+            exit(-1)
 
     print("Running a test of your configuration...\n")
+
     if torch.cuda.is_available():
         device_id = torch.cuda.current_device()
         gpu_properties = torch.cuda.get_device_properties(device_id)
@@ -123,8 +133,10 @@
             message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \
                       "wav, m4a, flac, ...):\n"
             in_fpath = Path(input(message).replace("\"", "").replace("\'", ""))
-
-
+
+            if (str(in_fpath)[-3:] == "mp3"):
+                print("Can't Use mp3 files please try again:")
+                in_fpath = Path(input(message).replace("\"", "").replace("\'", ""))
             ## Computing the embedding
             # First, we load the wav using the function that the speaker encoder provides. This is 
             # important: there is preprocessing that must be applied.

diff --git a/demo_toolbox.py b/demo_toolbox.py
@@ -28,6 +28,8 @@
         "overhead but allows to save some GPU memory for lower-end GPUs.")
     parser.add_argument("--seed", type=int, default=None, help=\
         "Optional random number seed value to make toolbox deterministic.")
+    parser.add_argument("--no_mp3_support", action="store_true", help=\
+        "If True, no mp3 files are allowed.")
     args = parser.parse_args()
     print_args(args, parser)
 

diff --git a/sample/sample_MP3.mp3 b/sample/sample_MP3.mp3
diff --git a/toolbox/__init__.py b/toolbox/__init__.py
@@ -9,7 +9,8 @@
 import traceback
 import sys
 import torch
-
+import librosa
+from audioread.exceptions import NoBackendError
 
 # Use this directory structure for your datasets, or modify it to fit your needs
 recognized_datasets = [
@@ -39,7 +40,14 @@
 MAX_WAVES = 15
 
 class Toolbox:
-    def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem, seed):
+    def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem, seed, no_mp3_support):
+        if not no_mp3_support:
+            try:
+                librosa.load(r"sample\sample_MP3.mp3")
+            except NoBackendError:
+                print("NoBackendError Exceptions raised please Install ffmpeg or rerun using no_mp3_support")
+                exit(-1)
+
         sys.excepthook = self.excepthook
         self.datasets_root = datasets_root
         self.low_mem = low_mem
@@ -64,7 +72,7 @@ def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir
         self.reset_ui(enc_models_dir, syn_models_dir, voc_models_dir, seed)
         self.setup_events()
         self.ui.start()
-        
+
     def excepthook(self, exc_type, exc_value, exc_tb):
         traceback.print_exception(exc_type, exc_value, exc_tb)
         self.ui.log("Exception: %s" % exc_value)
@@ -149,7 +157,11 @@ def load_from_browser(self, fpath=None):
         else:
             name = fpath.name
             speaker_name = fpath.parent.name
-
+
+        if (str(fpath)[-3:] == "mp3"):
+                print("Error: No mp3 file argument was passed but an mp3 file was used")
+                exit(-1)
+
         # Get the wav from the disk. We take the wav with the vocoder/synthesizer format for
         # playback, so as to have a fair comparison with the generated audio
         wav = Synthesizer.load_preprocess_wav(fpath)