diff --git a/test/tts_pipeline/test_text_analyzer.py b/test/tts_pipeline/test_text_analyzer.py index ebaf30977..0e29185ec 100644 --- a/test/tts_pipeline/test_text_analyzer.py +++ b/test/tts_pipeline/test_text_analyzer.py @@ -1,6 +1,13 @@ from unittest import TestCase -from voicevox_engine.model import AccentPhrase, Mora +import pytest + +from voicevox_engine.model import ( + AccentPhrase, + Mora, + NonOjtPhonemeError, + OjtUnknownPhonemeError, +) from voicevox_engine.tts_pipeline.text_analyzer import ( AccentPhraseLabel, BreathGroupLabel, @@ -402,22 +409,24 @@ def stub_unknown_features_koxx(_: str) -> list[str]: ] +def test_label_non_ojt_phoneme(): + """`Label` は OJT で想定されない音素をパース失敗する""" + non_ojt_phoneme = "G" + non_ojt_feature = f".^.-{non_ojt_phoneme}+.=./A:.+2+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.|._./G:._.%._._./H:._./I:.-.@1+.&.-.|.+./J:._./K:.+.-." # noqa: B950 + with pytest.raises(NonOjtPhonemeError): + unknown_label = Label.from_feature(non_ojt_feature) + unknown_label.phoneme + + +def test_label_unknown_phoneme(): + """`Label` は unknown 音素 `xx` をパース失敗する""" + unknown_feature = stub_unknown_features_koxx("dummy")[3] + with pytest.raises(OjtUnknownPhonemeError): + unknown_label = Label.from_feature(unknown_feature) + unknown_label.phoneme + + def test_text_to_accent_phrases_unknown(): - """`text_to_accent_phrases` は unknown 音素を含む features をパースする""" - # Expects - true_accent_phrases = [ - AccentPhrase( - moras=[ - _gen_mora("コ", "k", "o"), - _gen_mora("xx", None, "xx"), - ], - accent=1, - pause_mora=None, - ), - ] - # Outputs - accent_phrases = text_to_accent_phrases( - "dummy", text_to_features=stub_unknown_features_koxx - ) - # Tests - assert accent_phrases == true_accent_phrases + """`text_to_accent_phrases` は unknown 音素を含む features をパース失敗する""" + with pytest.raises(OjtUnknownPhonemeError): + text_to_accent_phrases("dummy", text_to_features=stub_unknown_features_koxx) diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py index 8d4c322b6..fe36c8640 100644 --- a/test/tts_pipeline/test_tts_engine.py +++ b/test/tts_pipeline/test_tts_engine.py @@ -3,7 +3,6 @@ from unittest.mock import Mock import numpy as np -import pytest from numpy.typing import NDArray from syrupy.assertion import SnapshotAssertion @@ -17,7 +16,6 @@ Note, Score, ) -from voicevox_engine.tts_pipeline.text_analyzer import text_to_accent_phrases from voicevox_engine.tts_pipeline.tts_engine import ( TTSEngine, apply_interrogative_upspeak, @@ -25,8 +23,6 @@ to_flatten_phonemes, ) -from .test_text_analyzer import stub_unknown_features_koxx - def yukarin_s_mock( length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64] @@ -296,19 +292,6 @@ def test_update_pitch(self): np.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends) -def test_create_accent_phrases_toward_unknown(): - """`TTSEngine.create_accent_phrases()` は unknown 音素の Phoneme 化に失敗する""" - engine = TTSEngine(MockCoreWrapper()) - - # NOTE: TTSEngine.create_accent_phrases() のコールで unknown feature を得ることが難しいため、疑似再現 - accent_phrases = text_to_accent_phrases( - "dummy", text_to_features=stub_unknown_features_koxx - ) - with pytest.raises(ValueError) as e: - accent_phrases = engine.update_length_and_pitch(accent_phrases, StyleId(0)) - assert str(e.value) == "tuple.index(x): x not in tuple" - - def test_mocked_update_length_output(snapshot_json: SnapshotAssertion) -> None: """モックされた `TTSEngine.update_length()` の出力スナップショットが一定である""" # Inputs diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py index f9e50b62c..348459008 100644 --- a/voicevox_engine/model.py +++ b/voicevox_engine/model.py @@ -149,6 +149,16 @@ def __init__(self, err: ParseKanaError): super().__init__(text=err.text, error_name=err.errname, error_args=err.kwargs) +class NonOjtPhonemeError(Exception): + def __init__(self, **kwargs: Any) -> None: + self.text = "OpenJTalk で想定されていない音素が生成されたため処理できません。" + + +class OjtUnknownPhonemeError(Exception): + def __init__(self, **kwargs: Any) -> None: + self.text = "OpenJTalk の unknown 音素 `xx` は非対応です。" + + class MorphableTargetInfo(BaseModel): is_morphable: bool = Field(title="指定した話者に対してモーフィングの可否") # FIXME: add reason property diff --git a/voicevox_engine/tts_pipeline/text_analyzer.py b/voicevox_engine/tts_pipeline/text_analyzer.py index cbb9736e8..8c6aace4d 100644 --- a/voicevox_engine/tts_pipeline/text_analyzer.py +++ b/voicevox_engine/tts_pipeline/text_analyzer.py @@ -5,13 +5,28 @@ import pyopenjtalk -from ..model import AccentPhrase, Mora +from ..model import AccentPhrase, Mora, NonOjtPhonemeError, OjtUnknownPhonemeError from .mora_mapping import mora_phonemes_to_mora_kana - -OjtVowel = Literal[ - "A", "E", "I", "N", "O", "U", "a", "cl", "e", "i", "o", "pau", "sil", "u" -] -OjtConsonant = Literal[ +from .phoneme import Consonant, Vowel + +# OpenJTalk が出力する音素の一覧。 +_OJT_VOWELS = ( + "A", + "E", + "I", + "N", + "O", + "U", + "a", + "cl", + "e", + "i", + "o", + "pau", + "sil", + "u", +) +_OJT_CONSONANTS = ( "b", "by", "ch", @@ -44,9 +59,9 @@ "w", "y", "z", -] -OjtUnknown = Literal["xx"] -OjtPhoneme = OjtVowel | OjtConsonant | OjtUnknown +) +_OJT_UNKNOWNS = ("xx",) +_OJT_PHONEMES = _OJT_VOWELS + _OJT_CONSONANTS + _OJT_UNKNOWNS @dataclass @@ -82,10 +97,16 @@ def from_feature(cls, feature: str) -> Self: return cls(contexts=contexts) @property - def phoneme(self) -> OjtPhoneme: + def phoneme(self) -> Vowel | Consonant | Literal["sil"]: """このラベルに含まれる音素。子音 or 母音 (無音含む)。""" - # FIXME: バリデーションする - return self.contexts["p3"] # type: ignore + p = self.contexts["p3"] + if p not in _OJT_PHONEMES: + raise NonOjtPhonemeError() + elif p == "xx": + raise OjtUnknownPhonemeError() + else: + # NOTE: mypy が型推論に失敗。pyright の推論した型が返り値型と一致することをマニュアル確認済み @2024-01-10 tarepan + return p # type: ignore @property def mora_index(self) -> int: