From 5bd3250efe039095e267bc9c30b89eefa5dc5c03 Mon Sep 17 00:00:00 2001 From: Aidan Pine Date: Thu, 11 May 2023 10:27:21 -0700 Subject: [PATCH] fix(mappings): output should not be escaped --- g2p/mappings/utils.py | 15 +++++++-------- g2p/tests/test_mappings.py | 7 +++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/g2p/mappings/utils.py b/g2p/mappings/utils.py index 1e889393..4a0743f7 100644 --- a/g2p/mappings/utils.py +++ b/g2p/mappings/utils.py @@ -429,17 +429,16 @@ def validate(mapping, path): def escape_special_characters(to_escape: Dict[str, str]) -> Dict[str, str]: - for k, v in to_escape.items(): - if isinstance(v, str): - escaped = re.escape(v) - else: - escaped = v - if escaped != v: + for key in ['in', 'context_before', 'context_after']: + if key not in to_escape or not isinstance(to_escape[key], str): + continue + escaped = re.escape(to_escape[key]) + if to_escape[key] != escaped: LOGGER.debug( - f"Escaped special characters in '{v}' with '{escaped}''. Set 'escape_special' " + f"Escaped special characters in '{to_escape[key]}' with '{escaped}'. Set 'escape_special' " "to False in your Mapping configuration to disable this." ) - to_escape[k] = escaped + to_escape[key] = escaped return to_escape diff --git a/g2p/tests/test_mappings.py b/g2p/tests/test_mappings.py index 339bd096..4eb30004 100755 --- a/g2p/tests/test_mappings.py +++ b/g2p/tests/test_mappings.py @@ -166,12 +166,19 @@ def test_case_sensitive(self): def test_escape_special(self): mapping = Mapping([{"in": r"\d", "out": "digit"}]) mapping_escaped = Mapping([{"in": r"\d", "out": "b"}], escape_special=True) + mapping_input_and_output_special_escaped = Mapping([{"in": "&", "out": "&"}], escape_special=True) + mapping_specific_from_fpcc = Mapping([{"in": r"^", "out": "A"}, {"in": "o", "out": r"."}], rule_ordering="apply-longest-first", escape_special=True) transducer = Transducer(mapping) transducer_escaped = Transducer(mapping_escaped) + transducer_escaped_input_output = Transducer(mapping_input_and_output_special_escaped) + transducer_fpcc = Transducer(mapping_specific_from_fpcc) self.assertEqual(transducer("1").output_string, "digit") self.assertEqual(transducer(r"\d").output_string, r"\d") self.assertEqual(transducer_escaped("1").output_string, "1") self.assertEqual(transducer_escaped(r"\d").output_string, "b") + self.assertEqual(transducer_escaped_input_output('&').output_string, "&") + self.assertEqual(transducer_fpcc("^o").output_string, "A.") + def test_norm_form(self): mapping_nfc = Mapping([{"in": "a\u0301", "out": "a"}]) # Defaults to NFC