Skip to content

Commit

Permalink
fix(mappings): output should not be escaped
Browse files Browse the repository at this point in the history
  • Loading branch information
roedoejet committed May 11, 2023
1 parent c64322f commit 5bd3250
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
15 changes: 7 additions & 8 deletions g2p/mappings/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,17 +429,16 @@ def validate(mapping, path):


def escape_special_characters(to_escape: Dict[str, str]) -> Dict[str, str]:
for k, v in to_escape.items():
if isinstance(v, str):
escaped = re.escape(v)
else:
escaped = v
if escaped != v:
for key in ['in', 'context_before', 'context_after']:
if key not in to_escape or not isinstance(to_escape[key], str):
continue
escaped = re.escape(to_escape[key])
if to_escape[key] != escaped:
LOGGER.debug(
f"Escaped special characters in '{v}' with '{escaped}''. Set 'escape_special' "
f"Escaped special characters in '{to_escape[key]}' with '{escaped}'. Set 'escape_special' "
"to False in your Mapping configuration to disable this."
)
to_escape[k] = escaped
to_escape[key] = escaped
return to_escape


Expand Down
7 changes: 7 additions & 0 deletions g2p/tests/test_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,19 @@ def test_case_sensitive(self):
def test_escape_special(self):
mapping = Mapping([{"in": r"\d", "out": "digit"}])
mapping_escaped = Mapping([{"in": r"\d", "out": "b"}], escape_special=True)
mapping_input_and_output_special_escaped = Mapping([{"in": "&", "out": "&"}], escape_special=True)
mapping_specific_from_fpcc = Mapping([{"in": r"^", "out": "A"}, {"in": "o", "out": r"."}], rule_ordering="apply-longest-first", escape_special=True)
transducer = Transducer(mapping)
transducer_escaped = Transducer(mapping_escaped)
transducer_escaped_input_output = Transducer(mapping_input_and_output_special_escaped)
transducer_fpcc = Transducer(mapping_specific_from_fpcc)
self.assertEqual(transducer("1").output_string, "digit")
self.assertEqual(transducer(r"\d").output_string, r"\d")
self.assertEqual(transducer_escaped("1").output_string, "1")
self.assertEqual(transducer_escaped(r"\d").output_string, "b")
self.assertEqual(transducer_escaped_input_output('&').output_string, "&")
self.assertEqual(transducer_fpcc("^o").output_string, "A.")


def test_norm_form(self):
mapping_nfc = Mapping([{"in": "a\u0301", "out": "a"}]) # Defaults to NFC
Expand Down

0 comments on commit 5bd3250

Please sign in to comment.