From 854b00c0fb0739e33c81ee919409f863b430d635 Mon Sep 17 00:00:00 2001 From: terepan Date: Tue, 9 Jan 2024 17:59:07 +0000 Subject: [PATCH 1/6] =?UTF-8?q?add:=20`xx`=20OJT=20=E9=9F=B3=E7=B4=A0?= =?UTF-8?q?=E3=81=AE=E3=83=90=E3=83=AA=E3=83=87=E3=83=BC=E3=82=B7=E3=83=A7?= =?UTF-8?q?=E3=83=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/tts_pipeline/test_text_analyzer.py | 33 ++++++++----------- test/tts_pipeline/test_tts_engine.py | 17 ---------- voicevox_engine/model.py | 5 +++ voicevox_engine/tts_pipeline/text_analyzer.py | 12 ++++--- 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/test/tts_pipeline/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py index ebaf30977..240d95663 100644 --- a/test/tts_pipeline/test_text_analyzer.py +++ b/test/tts_pipeline/test_text_analyzer.py @@ -1,6 +1,8 @@ from unittest import TestCase -from voicevox_engine.model import AccentPhrase, Mora +import pytest + +from voicevox_engine.model import AccentPhrase, Mora, UnknownOjtPhonemeError from voicevox_engine.tts_pipeline.text_analyzer import ( AccentPhraseLabel, BreathGroupLabel, @@ -402,22 +404,15 @@ def stub_unknown_features_koxx(_: str) -> list[str]: ] +def test_label_unknown_phoneme(): + """`Label` は unknown 音素 `xx` をパース失敗する""" + unknown_feature = stub_unknown_features_koxx("dummy")[3] + with pytest.raises(UnknownOjtPhonemeError): + unknown_label = Label.from_feature(unknown_feature) + unknown_label.phoneme + + def test_text_to_accent_phrases_unknown(): - """`text_to_accent_phrases` は unknown 音素を含む features をパースする""" - # Expects - true_accent_phrases = [ - AccentPhrase( - moras=[ - _gen_mora("コ", "k", "o"), - _gen_mora("xx", None, "xx"), - ], - accent=1, - pause_mora=None, - ), - ] - # Outputs - accent_phrases = text_to_accent_phrases( - "dummy", text_to_features=stub_unknown_features_koxx - ) - # Tests - assert accent_phrases == true_accent_phrases + """`text_to_accent_phrases` は unknown 音素を含む features をパース失敗する""" + with pytest.raises(UnknownOjtPhonemeError): + text_to_accent_phrases("dummy", text_to_features=stub_unknown_features_koxx) diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py index e7417792a..ec955547c 100644 --- a/test/tts_pipeline/test_tts_engine.py +++ b/test/tts_pipeline/test_tts_engine.py @@ -3,7 +3,6 @@ from unittest.mock import Mock import numpy as np -import pytest from numpy.typing import NDArray from syrupy.extensions.json import JSONSnapshotExtension @@ -14,7 +13,6 @@ UNVOICED_MORA_TAIL_PHONEMES, Phoneme, ) -from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases from voicevox_engine.tts_pipeline.tts_engine import ( TTSEngine, apply_interrogative_upspeak, @@ -22,8 +20,6 @@ to_flatten_phonemes, ) -from .test_text_analyzer import stub_unknown_features_koxx - def yukarin_s_mock( length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64] @@ -309,19 +305,6 @@ def result_value(i: int) -> float: self.assertEqual(result, true_result) -def test_create_accent_phrases_toward_unknown(): - """`TTSEngine.create_accent_phrases()` は unknown 音素の Phoneme 化に失敗する""" - engine = TTSEngine(MockCoreWrapper()) - - # NOTE: TTSEngine.create_accent_phrases() のコールで unknown feature を得ることが難しいため、疑似再現 - accent_phrases = text_to_accent_phrases( - "dummy", text_to_features=stub_unknown_features_koxx - ) - with pytest.raises(ValueError) as e: - accent_phrases = engine.update_length_and_pitch(accent_phrases, StyleId(0)) - assert str(e.value) == "tuple.index(x): x not in tuple" - - def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> None: # Inputs tts_engine = TTSEngine(MockCoreWrapper()) diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py index cd92cab4f..839b0fefa 100644 --- a/voicevox_engine/model.py +++ b/voicevox_engine/model.py @@ -109,6 +109,11 @@ def __init__(self, err: ParseKanaError): super().__init__(text=err.text, error_name=err.errname, error_args=err.kwargs) +class UnknownOjtPhonemeError(Exception): + def __init__(self, **kwargs: Any) -> None: + self.text = "OpenJTalk の unknown 音素 `xx` は非対応です。" + + class MorphableTargetInfo(BaseModel): is_morphable: bool = Field(title="指定した話者に対してモーフィングの可否") # FIXME: add reason property diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py index 37fce007b..bb6329834 100644 --- a/voicevox_engine/tts_pipeline/text_analyzer.py +++ b/voicevox_engine/tts_pipeline/text_analyzer.py @@ -5,7 +5,8 @@ import pyopenjtalk -from ..model import AccentPhrase, Mora +from ..model import AccentPhrase, Mora, UnknownOjtPhonemeError +from .acoustic_feature_extractor import Consonant, Vowel from .mora_list import mora_phonemes_to_mora_kana OjtVowel = Literal[ @@ -82,10 +83,13 @@ def from_feature(cls, feature: str) -> Self: return cls(contexts=contexts) @property - def phoneme(self) -> OjtPhoneme: + def phoneme(self) -> Vowel | Consonant | Literal["sil"]: """このラベルに含まれる音素。子音 or 母音 (無音含む)。""" - # FIXME: バリデーションする - return self.contexts["p3"] # type: ignore + p: OjtPhoneme = self.contexts["p3"] # type: ignore + if p == "xx": + raise UnknownOjtPhonemeError() + else: + return p @property def mora_index(self) -> int: From 8d90203d50099145fa5592d79f18547a8b717bbd Mon Sep 17 00:00:00 2001 From: terepan Date: Tue, 9 Jan 2024 18:24:51 +0000 Subject: [PATCH 2/6] =?UTF-8?q?add:=20OJT=E9=9F=B3=E7=B4=A0=E3=83=90?= =?UTF-8?q?=E3=83=AA=E3=83=87=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/tts_pipeline/test_text_analyzer.py | 6 +- voicevox_engine/model.py | 7 ++- voicevox_engine/tts_pipeline/text_analyzer.py | 62 +++++++++++++++++-- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/test/tts_pipeline/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py index 240d95663..4c475ae44 100644 --- a/test/tts_pipeline/test_text_analyzer.py +++ b/test/tts_pipeline/test_text_analyzer.py @@ -2,7 +2,7 @@ import pytest -from voicevox_engine.model import AccentPhrase, Mora, UnknownOjtPhonemeError +from voicevox_engine.model import AccentPhrase, Mora, OjtUnknownPhonemeError from voicevox_engine.tts_pipeline.text_analyzer import ( AccentPhraseLabel, BreathGroupLabel, @@ -407,12 +407,12 @@ def stub_unknown_features_koxx(_: str) -> list[str]: def test_label_unknown_phoneme(): """`Label` は unknown 音素 `xx` をパース失敗する""" unknown_feature = stub_unknown_features_koxx("dummy")[3] - with pytest.raises(UnknownOjtPhonemeError): + with pytest.raises(OjtUnknownPhonemeError): unknown_label = Label.from_feature(unknown_feature) unknown_label.phoneme def test_text_to_accent_phrases_unknown(): """`text_to_accent_phrases` は unknown 音素を含む features をパース失敗する""" - with pytest.raises(UnknownOjtPhonemeError): + with pytest.raises(OjtUnknownPhonemeError): text_to_accent_phrases("dummy", text_to_features=stub_unknown_features_koxx) diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py index 839b0fefa..a08c7af35 100644 --- a/voicevox_engine/model.py +++ b/voicevox_engine/model.py @@ -109,7 +109,12 @@ def __init__(self, err: ParseKanaError): super().__init__(text=err.text, error_name=err.errname, error_args=err.kwargs) -class UnknownOjtPhonemeError(Exception): +class NonOjtPhonemeError(Exception): + def __init__(self, **kwargs: Any) -> None: + self.text = "OpenJTalk で想定されていない音素が生成されたため処理できません。" + + +class OjtUnknownPhonemeError(Exception): def __init__(self, **kwargs: Any) -> None: self.text = "OpenJTalk の unknown 音素 `xx` は非対応です。" diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py index bb6329834..446490077 100644 --- a/voicevox_engine/tts_pipeline/text_analyzer.py +++ b/voicevox_engine/tts_pipeline/text_analyzer.py @@ -5,7 +5,7 @@ import pyopenjtalk -from ..model import AccentPhrase, Mora, UnknownOjtPhonemeError +from ..model import AccentPhrase, Mora, NonOjtPhonemeError, OjtUnknownPhonemeError from .acoustic_feature_extractor import Consonant, Vowel from .mora_list import mora_phonemes_to_mora_kana @@ -48,6 +48,55 @@ ] OjtUnknown = Literal["xx"] OjtPhoneme = OjtVowel | OjtConsonant | OjtUnknown +_OJT_PHONEMES: list[OjtPhoneme] = [ + "A", + "E", + "I", + "N", + "O", + "U", + "a", + "cl", + "e", + "i", + "o", + "pau", + "sil", + "u", + "b", + "by", + "ch", + "d", + "dy", + "f", + "g", + "gw", + "gy", + "h", + "hy", + "j", + "k", + "kw", + "ky", + "m", + "my", + "n", + "ny", + "p", + "py", + "r", + "ry", + "s", + "sh", + "t", + "ts", + "ty", + "v", + "w", + "y", + "z", + "xx", +] @dataclass @@ -85,11 +134,14 @@ def from_feature(cls, feature: str) -> Self: @property def phoneme(self) -> Vowel | Consonant | Literal["sil"]: """このラベルに含まれる音素。子音 or 母音 (無音含む)。""" - p: OjtPhoneme = self.contexts["p3"] # type: ignore - if p == "xx": - raise UnknownOjtPhonemeError() + p = self.contexts["p3"] + if p not in _OJT_PHONEMES: + raise NonOjtPhonemeError() + elif p == "xx": + raise OjtUnknownPhonemeError() else: - return p + # NOTE: mypy が型推論に失敗。pyright の推論した型が返り値型と一致することをマニュアル確認済み @2024-01-10 tarepan + return p # type: ignore @property def mora_index(self) -> int: From 0e2189c670adbac741db4efaf02627f885e55027 Mon Sep 17 00:00:00 2001 From: terepan Date: Tue, 9 Jan 2024 18:28:06 +0000 Subject: [PATCH 3/6] =?UTF-8?q?add:=20NonOJT=E9=9F=B3=E7=B4=A0=E7=95=B0?= =?UTF-8?q?=E5=B8=B8=E7=B3=BB=E3=83=86=E3=82=B9=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/tts_pipeline/test_text_analyzer.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/tts_pipeline/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py index 4c475ae44..f7bc95aa0 100644 --- a/test/tts_pipeline/test_text_analyzer.py +++ b/test/tts_pipeline/test_text_analyzer.py @@ -2,7 +2,12 @@ import pytest -from voicevox_engine.model import AccentPhrase, Mora, OjtUnknownPhonemeError +from voicevox_engine.model import ( + AccentPhrase, + Mora, + NonOjtPhonemeError, + OjtUnknownPhonemeError, +) from voicevox_engine.tts_pipeline.text_analyzer import ( AccentPhraseLabel, BreathGroupLabel, @@ -404,6 +409,14 @@ def stub_unknown_features_koxx(_: str) -> list[str]: ] +def test_label_non_ojt_phoneme(): + """`Label` は OJT で想定されない音素をパース失敗する""" + non_ojt_feature = ".^.-G+.=./A:.+2+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-." # noqa: B950 + with pytest.raises(NonOjtPhonemeError): + unknown_label = Label.from_feature(non_ojt_feature) + unknown_label.phoneme + + def test_label_unknown_phoneme(): """`Label` は unknown 音素 `xx` をパース失敗する""" unknown_feature = stub_unknown_features_koxx("dummy")[3] From c8d34a65d8795b1fdfe8f93284d44bdd9260df3e Mon Sep 17 00:00:00 2001 From: terepan Date: Wed, 17 Jan 2024 05:47:58 +0000 Subject: [PATCH 4/6] =?UTF-8?q?refactor:=20feature=20=E6=96=87=E5=AD=97?= =?UTF-8?q?=E5=88=97=E3=81=B8=E3=81=AE=E5=A4=89=E6=95=B0=E4=BB=A3=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/tts_pipeline/test_text_analyzer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/tts_pipeline/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py index f7bc95aa0..0e29185ec 100644 --- a/test/tts_pipeline/test_text_analyzer.py +++ b/test/tts_pipeline/test_text_analyzer.py @@ -411,7 +411,8 @@ def stub_unknown_features_koxx(_: str) -> list[str]: def test_label_non_ojt_phoneme(): """`Label` は OJT で想定されない音素をパース失敗する""" - non_ojt_feature = ".^.-G+.=./A:.+2+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-." # noqa: B950 + non_ojt_phoneme = "G" + non_ojt_feature = f".^.-{non_ojt_phoneme}+.=./A:.+2+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-." # noqa: B950 with pytest.raises(NonOjtPhonemeError): unknown_label = Label.from_feature(non_ojt_feature) unknown_label.phoneme From 9a137a2056007f66fe3f7f2587a4271fa0f0de5c Mon Sep 17 00:00:00 2001 From: terepan Date: Wed, 17 Jan 2024 06:10:50 +0000 Subject: [PATCH 5/6] =?UTF-8?q?refactor:=20OJT=E9=9F=B3=E7=B4=A0=E5=9E=8B?= =?UTF-8?q?=E5=BB=83=E6=AD=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- voicevox_engine/tts_pipeline/text_analyzer.py | 49 +++---------------- 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py index 446490077..d4b4da6a4 100644 --- a/voicevox_engine/tts_pipeline/text_analyzer.py +++ b/voicevox_engine/tts_pipeline/text_analyzer.py @@ -9,46 +9,8 @@ from .acoustic_feature_extractor import Consonant, Vowel from .mora_list import mora_phonemes_to_mora_kana -OjtVowel = Literal[ - "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "pau", "sil", "u" -] -OjtConsonant = Literal[ - "b", - "by", - "ch", - "d", - "dy", - "f", - "g", - "gw", - "gy", - "h", - "hy", - "j", - "k", - "kw", - "ky", - "m", - "my", - "n", - "ny", - "p", - "py", - "r", - "ry", - "s", - "sh", - "t", - "ts", - "ty", - "v", - "w", - "y", - "z", -] -OjtUnknown = Literal["xx"] -OjtPhoneme = OjtVowel | OjtConsonant | OjtUnknown -_OJT_PHONEMES: list[OjtPhoneme] = [ +# OpenJTalk が出力する音素の一覧。 +_OJT_VOWELS = ( "A", "E", "I", @@ -63,6 +25,8 @@ "pau", "sil", "u", +) +_OJT_CONSONANTS = ( "b", "by", "ch", @@ -95,8 +59,9 @@ "w", "y", "z", - "xx", -] +) +_OJT_UNKNOWNS = ("xx",) +_OJT_PHONEMES = _OJT_VOWELS + _OJT_CONSONANTS + _OJT_UNKNOWNS @dataclass From af81868206375c0e4f25c2d9bc528dd4cd202ae0 Mon Sep 17 00:00:00 2001 From: tarepan Date: Fri, 23 Feb 2024 13:05:39 +0000 Subject: [PATCH 6/6] fix: lint --- voicevox_engine/tts_pipeline/text_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py index 00dcb0779..8c6aace4d 100644 --- a/voicevox_engine/tts_pipeline/text_analyzer.py +++ b/voicevox_engine/tts_pipeline/text_analyzer.py @@ -6,8 +6,8 @@ import pyopenjtalk from ..model import AccentPhrase, Mora, NonOjtPhonemeError, OjtUnknownPhonemeError -from .phoneme import Consonant, Vowel from .mora_mapping import mora_phonemes_to_mora_kana +from .phoneme import Consonant, Vowel # OpenJTalk が出力する音素の一覧。 _OJT_VOWELS = (