fix incompatible model shape

myshell-ai · Mar 29, 2024 · 5d7b264 · 5d7b264
1 parent ba1a846
commit 5d7b264
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 2 deletions.
diff --git a/MeloTTS/melo/api.py b/MeloTTS/melo/api.py
@@ -39,14 +39,17 @@ def __init__(
         num_tones = hps.num_tones
         symbols = hps.symbols
 
+        args = dict(hps.model)
+        args.pop("num_languages", None)
+
         model = SynthesizerTrn(
             len(symbols),
             hps.data.filter_length // 2 + 1,
             hps.train.segment_size // hps.data.hop_length,
             n_speakers=hps.data.n_speakers,
             num_tones=num_tones,
             num_languages=num_languages,
-            **hps.model,
+            **args,
         ).to(device)
 
         model.eval()

diff --git a/MeloTTS/melo/preprocess_text.py b/MeloTTS/melo/preprocess_text.py
@@ -122,7 +122,11 @@ def main(
 
     config["data"]["training_files"] = train_path
     config["data"]["validation_files"] = val_path
-    config["data"]["n_speakers"] = len(spk_id_map)
+
+    # Hardcoded for compatibility with the pretrained model
+    config["data"]["n_speakers"] = 256# len(spk_id_map)
+    config["model"]["num_languages"] = 10
+
     config["num_languages"] = num_languages
     config["num_tones"] = num_tones
     config["symbols"] = symbols

diff --git a/docs/miner.md b/docs/miner.md
@@ -33,6 +33,8 @@ where the `config.json` will be generated in the same folder as the `metadata.li
 
 In addition, you can add the `--pretrain_G path_to_your_pretrained_model_pth_file` to load the pretrained model. For example, the official MeloTTS model can be downloaded from the [Huggingface model hub](https://huggingface.co/myshell-ai/MeloTTS-English).
 
+> Warning: As for now, there is an incompatible issue between the pretrained model and the default model configuration. We've fixed it in the newest version. But if you are using an old version of this package, please edit the `config.json` file in the same folder as the `metadata.list` file and change the `n_speakers` in the `data` section from 1 to 256 and add an additional line `"num_languages": 10` in the model section. An example of a good configuration file is provided in the `docs/config.json`. Sorry for the inconvenience.
+
 ### 4. Listen to the generated audio
 After training, you can run the following command to generate audio from the text: