Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/pronounce_digits #150

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,6 @@ venv.bak/
.mypy_cache/

# VSCod(e/ium)
.vscode/
.vscode*
vscode/
*.code-workspace
22 changes: 22 additions & 0 deletions lingua_franca/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
_REGISTERED_FUNCTIONS = ("nice_number",
"nice_time",
"pronounce_number",
"pronounce_digits",
"nice_response",
"nice_duration")

Expand Down Expand Up @@ -296,6 +297,27 @@ def pronounce_number(number, lang=None, places=2, short_scale=True,
"""


@localized_function()
def pronounce_digits(number, lang=None, places=2, all_digits=True, casual=False):
"""
Pronounce a number's digits, either colloquially or in full

In English, the colloquial way is usually to read two digits at a time,
treating each pair as a single number.

Examples:
>>> pronounce_digits(127, all_digits=False)
'one twenty seven'
>>> pronounce_digits(127, all_digits=True)
'one two seven'

Args:
number (int|float)
all_digits (bool): read every digit, rather than two digits at a time
casual (bool): in some languages, use a colloquialism for "zero", such as "oh"
"""


def nice_date(dt, lang=None, now=None):
"""
Format a datetime to a pronounceable date
Expand Down
80 changes: 80 additions & 0 deletions lingua_franca/lang/format_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# limitations under the License.
#

from math import modf

from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
_FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN, _LONG_ORDINAL_EN
Expand Down Expand Up @@ -302,6 +304,84 @@ def _long_scale(n):
return result


def pronounce_digits_en(number, places=2, all_digits=True, casual=False):
decimal_part = ""
integer_part = ""
back_digits = ""
result = []
def _update_result_helper(_result, back_digits):
# if all((any((integer_part, result)), back_digits.startswith('0'))):
# _result.insert(0, zero_word)
if back_digits.startswith('0'):
_result.insert(0, zero_word)
back_digits = back_digits[1:]
if back_digits.endswith('0'):
_result.insert(-1, zero_word)
back_digits = back_digits[:-1]
return pronounce_number_en(int(back_digits)).split(" ") + _result

# TODO make this part of common data?
zero_word = "zero" if not casual else "oh"
is_float = isinstance(number, float)
if is_float:
integer_part, decimal_part = str(number).split(".")
decimal_part = pronounce_number_en(
float("." + decimal_part), places=places)
if decimal_part.startswith("zero point"):
decimal_part = decimal_part.lstrip("zero ")
else:
integer_part = str(number)

if all_digits:
result = [pronounce_number_en(int(i)) for i in integer_part]
if is_float:
result.append(decimal_part)
result = " ".join(result)
else:
while len(integer_part) > 1:
idx = -2 if len(integer_part) in [2, 4] else -3
back_digits = integer_part[idx:]
integer_part = integer_part[:idx]

front_zero = False
mid_zero = False
end_zero = False
if '0' in back_digits:
front_zero = back_digits[0] == '0'
end_zero = back_digits[-1] == '0'
_result = [f"{zero_word if front_zero else pronounce_number_en(int(back_digits[0]))}"]
if idx == -3:
mid_zero = back_digits[1] == '0'
_result.append(f"{zero_word if mid_zero else pronounce_number_en(int(back_digits[1]))}")
_result.append
_result.append(f"{zero_word if end_zero else pronounce_number_en(int(back_digits[-1]))}")
result = _result + result
else:
result = pronounce_number_en(int(back_digits)).split(" ") + result

if integer_part:
_int = int(integer_part)
if result:
if any((integer_part.startswith('0'), integer_part.endswith('0'))):
result.insert(0, zero_word)
result.insert(0, pronounce_number_en(_int))

if is_float:
result.append(decimal_part)

no_no_words = ['and', '']
_result = list(result)
for index, word in enumerate(result):
if index < len(result) - 1:
if all((word == _SHORT_SCALE_EN[100],
result[index + 1] == 'and')):
_result[index] = 'and' # let the next pass remove this

result = " ".join([word for word in _result if word != 'and'])

return result


def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
Expand Down
42 changes: 42 additions & 0 deletions test/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from lingua_franca.format import nice_year
from lingua_franca.format import nice_duration
from lingua_franca.format import pronounce_number
from lingua_franca.format import pronounce_digits
from lingua_franca.format import date_time_format
from lingua_franca.format import join_list

Expand Down Expand Up @@ -387,6 +388,47 @@ def test_ordinals(self):
# def nice_time(dt, lang="en-us", speech=True, use_24hour=False,
# use_ampm=False):

class TestPronounceDigits(unittest.TestCase):
def test_integers(self):
self.assertEqual(pronounce_digits(0, all_digits=False), "zero")
self.assertEqual(pronounce_digits(1, all_digits=False), "one")
self.assertEqual(pronounce_digits(12345, all_digits=False), "twelve three forty five")
self.assertEqual(pronounce_digits(7395, all_digits=False), "seventy three ninety five")
self.assertEqual(pronounce_digits(286, all_digits=False), "two eighty six")
self.assertEqual(pronounce_digits(2806, all_digits=False), "twenty eight zero six")
self.assertEqual(pronounce_digits(2806, all_digits=False, casual=True), "twenty eight oh six")
self.assertEqual(pronounce_digits(20806, all_digits=False), "two zero eight zero six")
self.assertEqual(pronounce_digits(20806, all_digits=False, casual=True), "two oh eight oh six")
self.assertEqual(pronounce_digits(311412, all_digits=False), "three eleven four twelve")
self.assertEqual(pronounce_digits(354808912, all_digits=False), "three fifty four eight zero eight nine twelve")
self.assertEqual(pronounce_digits(238513096, all_digits=False), "two thirty eight five thirteen zero nine six")
self.assertEqual(pronounce_digits(238513696, all_digits=False), "two thirty eight five thirteen six ninety six")

def test_integers_all_digits(self):
self.assertEqual(pronounce_digits(0, all_digits=True), "zero")
self.assertEqual(pronounce_digits(1, all_digits=True), "one")
self.assertEqual(pronounce_digits(12345, all_digits=True), "one two three four five")
self.assertEqual(pronounce_digits(7395, all_digits=True), "seven three nine five")

def test_floats(self):
self.assertEqual(pronounce_digits(0.1, all_digits=False), "zero point one")
self.assertEqual(pronounce_digits(0.48, all_digits=False), "zero point four eight")
self.assertEqual(pronounce_digits(6.40, all_digits=False), "six point four")
self.assertEqual(pronounce_digits(56.92, all_digits=False), "fifty six point nine two")

def test_floats_all_digits(self):
self.assertEqual(pronounce_digits(0.7, all_digits=True), "zero point seven")
self.assertEqual(pronounce_digits(6.04, all_digits=True), "six point zero four")
self.assertEqual(pronounce_digits(6.40, all_digits=True), "six point four")
self.assertEqual(pronounce_digits(56.92, all_digits=True), "five six point nine two")

def test_decimal_places(self):
self.assertEqual(pronounce_digits(34.6912, all_digits=False), "thirty four point six nine")
self.assertEqual(pronounce_digits(34.6912, all_digits=False, places=3), "thirty four point six nine one")
self.assertEqual(pronounce_digits(34.6912, all_digits=False, places=4), "thirty four point six nine one two")
self.assertEqual(pronounce_digits(34.6912, all_digits=False, places=5), "thirty four point six nine one two")
self.assertEqual(pronounce_digits(34.6912, all_digits=True, places=4), "three four point six nine one two")


class TestNiceDateFormat(unittest.TestCase):
@classmethod
Expand Down