From f423a9398f8cccd60a19b2afc22139e7d58401b4 Mon Sep 17 00:00:00 2001 From: Dhaifallah Alwadani Date: Wed, 21 Jun 2017 22:20:22 +0300 Subject: [PATCH 1/6] add support for Arabic --- num2words/__init__.py | 2 + num2words/lang_AR.py | 121 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 num2words/lang_AR.py diff --git a/num2words/__init__.py b/num2words/__init__.py index ae864e74..69ad8dbf 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -16,6 +16,7 @@ from __future__ import unicode_literals +from . import lang_AR from . import lang_EN from . import lang_EN_GB from . import lang_EN_IN @@ -39,6 +40,7 @@ CONVERTER_CLASSES = { + 'ar': lang_AR.Num2Word_AR(), 'en': lang_EN.Num2Word_EN(), 'en_GB': lang_EN_GB.Num2Word_EN_GB(), 'en_IN': lang_EN_IN.Num2Word_EN_IN(), diff --git a/num2words/lang_AR.py b/num2words/lang_AR.py new file mode 100644 index 00000000..56c5c4e7 --- /dev/null +++ b/num2words/lang_AR.py @@ -0,0 +1,121 @@ +# Copyright (c) 2003, Taro Ogawa. All Rights Reserved. +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import division, unicode_literals, print_function +from . import lang_EU + +class Num2Word_AR(lang_EU.Num2Word_EU): + def set_high_numwords(self, high): + max = 3 + 3*len(high) + for word, n in zip(high, range(max, 3, -3)): + self.cards[10**n] = word + "illion" + + def setup(self): + self.negword = "minus " + self.pointword = "point" + self.errmsg_nornum = "Only numbers may be converted to words." + self.exclude_title = ["and", "point", "minus"] + + self.mid_numwords = [(1000000, "مليون"),(1000, "ألف"), (100, "مئة"), + (90, "تسعين"), (80, "ثمانين"), (70, "سبعين"), + (60, "ستين"), (50, "خمسين"), (40, "أربعين"), + (30, "ثلاثين")] + self.low_numwords = ["عشرين", "تسعة عشر", "ثمانية عشر", "سبعة عشر", + "sixteen", "خمسة عشر", "fourteen", "thirteen", + "twelve", "أحد عشر", "ten", "nine", "ثمانية", + "seven", "six", "خمسة", "أربعة", "three", "اثنين", + "واحد", "صفر"] + self.ords = { "one" : "first", + "two" : "second", + "three" : "third", + "five" : "fifth", + "eight" : "eighth", + "nine" : "ninth", + "twelve" : "twelfth" } + + + def merge(self, lpair, rpair): + ltext, lnum = lpair + rtext, rnum = rpair + if lnum == 1 and rnum < 100: + return (rtext, rnum) + elif 100 > lnum > rnum : + return ("%s و%s"%(rtext, ltext), rnum + lnum) + elif lnum >= 100 > rnum: + return ("%s و %s"%(ltext, rtext), lnum + rnum) + elif rnum > lnum: + if lnum == 1 and rnum in [100, 1000]: + return ("%s"%(rtext), rnum * lnum) + if lnum == 2 and rnum == 100: + return ("مئتين", rnum * lnum) + if lnum == 2 and rnum in [100, 1000]: + return ("%sين"%(rtext), rnum * lnum) + return ("%s %s"%(ltext, rtext), lnum * rnum) + return ("%s، %s"%(ltext, rtext), lnum + rnum) + + + def to_ordinal(self, value): + self.verify_ordinal(value) + outwords = self.to_cardinal(value).split(" ") + lastwords = outwords[-1].split("-") + lastword = lastwords[-1].lower() + try: + lastword = self.ords[lastword] + except KeyError: + if lastword[-1] == "y": + lastword = lastword[:-1] + "ie" + lastword += "th" + lastwords[-1] = self.title(lastword) + outwords[-1] = "،".join(lastwords) + return " ".join(outwords) + + + def to_ordinal_num(self, value): + self.verify_ordinal(value) + return "%s%s"%(value, self.to_ordinal(value)[-2:]) + + + def to_year(self, val, longval=True): + if not (val//100)%10: + return self.to_cardinal(val) + return self.to_splitnum(val, hightxt="hundred", jointxt="and", + longval=longval) + + def to_currency(self, val, longval=True): + return self.to_splitnum(val, hightxt="dollar/s", lowtxt="cent/s", + jointxt="and", longval=longval, cents = True) + + +n2w = Num2Word_AR() +to_card = n2w.to_cardinal +to_ord = n2w.to_ordinal +to_ordnum = n2w.to_ordinal_num +to_year = n2w.to_year + +def main(): + for val in [ 1, 11, 12, 21, 31, 33, 71, 80, 81, 91, 99, 100, 101, 102, 155, + 180, 300, 308, 832, 1000, 1001, 1061, 1100, 1500, 1701, 3000, + 8280, 8291, 150000, 500000, 1000000, 2000000, 2000001, + -21212121211221211111, -2.121212, -1.0000100]: + n2w.test(val) + n2w.test(1325325436067876801768700107601001012212132143210473207540327057320957032975032975093275093275093270957329057320975093272950730) + for val in [1,120,1000,1120,1800, 1976,2000,2010,2099,2171]: + print(val, "is", n2w.to_currency(val)) + print(val, "is", n2w.to_year(val)) + + +if __name__ == "__main__": + main() From 79215605140bcfbc40dad095f844652e914fd48e Mon Sep 17 00:00:00 2001 From: Dhaifallah Alwadani Date: Wed, 21 Jun 2017 22:22:27 +0300 Subject: [PATCH 2/6] Add arabic to README --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index a3808e74..606b1ca8 100644 --- a/README.rst +++ b/README.rst @@ -45,6 +45,7 @@ Besides the numerical argument, there's two optional arguments. **lang:** The language in which to convert the number. Supported values are: * ``en`` (English, default) +* ``ar`` (Arabic) * ``fr`` (French) * ``de`` (German) * ``es`` (Spanish) From eea901734a7b37d89170c1eddc9cf5c2446f9762 Mon Sep 17 00:00:00 2001 From: Dhaifallah Alwadani Date: Thu, 22 Jun 2017 23:22:33 +0300 Subject: [PATCH 3/6] fix encoding --- num2words/lang_AR.py | 1 + 1 file changed, 1 insertion(+) diff --git a/num2words/lang_AR.py b/num2words/lang_AR.py index 56c5c4e7..7c748b0a 100644 --- a/num2words/lang_AR.py +++ b/num2words/lang_AR.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright (c) 2003, Taro Ogawa. All Rights Reserved. # Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. From 6c3e75d4c826c952d198a5350b5e868ce2611d14 Mon Sep 17 00:00:00 2001 From: Dhaifallah Alwadani Date: Thu, 22 Jun 2017 23:48:44 +0300 Subject: [PATCH 4/6] fix missing words --- num2words/lang_AR.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/num2words/lang_AR.py b/num2words/lang_AR.py index 7c748b0a..aee780ba 100644 --- a/num2words/lang_AR.py +++ b/num2words/lang_AR.py @@ -25,27 +25,28 @@ def set_high_numwords(self, high): self.cards[10**n] = word + "illion" def setup(self): - self.negword = "minus " - self.pointword = "point" + self.negword = "سالب " + self.pointword = "فاصلة" self.errmsg_nornum = "Only numbers may be converted to words." - self.exclude_title = ["and", "point", "minus"] + self.exclude_title = ["و", "فاصلة", "سالب"] self.mid_numwords = [(1000000, "مليون"),(1000, "ألف"), (100, "مئة"), (90, "تسعين"), (80, "ثمانين"), (70, "سبعين"), (60, "ستين"), (50, "خمسين"), (40, "أربعين"), (30, "ثلاثين")] self.low_numwords = ["عشرين", "تسعة عشر", "ثمانية عشر", "سبعة عشر", - "sixteen", "خمسة عشر", "fourteen", "thirteen", - "twelve", "أحد عشر", "ten", "nine", "ثمانية", - "seven", "six", "خمسة", "أربعة", "three", "اثنين", + "ستة عشر", "خمسة عشر", "أربعة عشر", "ثلاثة عشر", + "اثناعشر", "أحد عشر", "عشرة", "تسعة", "ثمانية", + "سبعة", "ستة", "خمسة", "أربعة", "ثلاثة", "اثنين", "واحد", "صفر"] - self.ords = { "one" : "first", - "two" : "second", - "three" : "third", - "five" : "fifth", - "eight" : "eighth", - "nine" : "ninth", - "twelve" : "twelfth" } + self.ords = { "واحد" : "أول", + "اثنين" : "ثاني", + "ثلاثة" : "ثالث", + "أربعة": "رابع", + "خمسة" : "خامس", + "ثمانية" : "ثامن", + "تسعة" : "تاسع", + "اثناعشر" : "ثاني عشر" } def merge(self, lpair, rpair): @@ -58,7 +59,7 @@ def merge(self, lpair, rpair): elif lnum >= 100 > rnum: return ("%s و %s"%(ltext, rtext), lnum + rnum) elif rnum > lnum: - if lnum == 1 and rnum in [100, 1000]: + if lnum == 1 and rnum in [100, 1000, 1000000]: return ("%s"%(rtext), rnum * lnum) if lnum == 2 and rnum == 100: return ("مئتين", rnum * lnum) @@ -76,9 +77,7 @@ def to_ordinal(self, value): try: lastword = self.ords[lastword] except KeyError: - if lastword[-1] == "y": - lastword = lastword[:-1] + "ie" - lastword += "th" + lastword += "" lastwords[-1] = self.title(lastword) outwords[-1] = "،".join(lastwords) return " ".join(outwords) From 7f3f65d9f4c794d293242271d099e00c84298555 Mon Sep 17 00:00:00 2001 From: Dhaifallah Alwadani Date: Thu, 22 Jun 2017 23:49:07 +0300 Subject: [PATCH 5/6] Add arabic tests --- tests/test_ar.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 tests/test_ar.py diff --git a/tests/test_ar.py b/tests/test_ar.py new file mode 100644 index 00000000..6b0f6e74 --- /dev/null +++ b/tests/test_ar.py @@ -0,0 +1,62 @@ +# encoding: UTF-8 +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + +TEST_CASES_CARDINAL = ( + (1, 'واحد'), + (2, 'اثنين'), + (11, 'أحد عشر'), + (12, 'اثناعشر'), + (20, 'عشرين'), + (21, 'واحد وعشرين'), + (26, 'ستة وعشرين'), + (30, 'ثلاثين'), + (67, 'سبعة وستين'), + (70, 'سبعين'), + (100, 'مئة'), + (101, 'مئة و واحد'), + (199, 'مئة و تسعة وتسعين'), + (203, 'مئتين و ثلاثة'), + (1000, 'ألف'), + (1001, 'ألف و واحد'), + (1097, 'ألف و سبعة وتسعين'), + (1000000, 'مليون'), + (1000001, 'مليون و واحد'), +) + +TEST_CASES_ORDINAL = ( + (1, 'أول'), + (8, 'ثامن'), + (12, 'ثاني عشر'), + (100, 'مئة'), +) + +class Num2WordsARTest(TestCase): + + def test_number(self): + for test in TEST_CASES_CARDINAL: + self.assertEqual(num2words(test[0], lang='ar'), test[1]) + + def test_ordinal(self): + for test in TEST_CASES_ORDINAL: + self.assertEqual( + num2words(test[0], lang='ar', ordinal=True), + test[1] + ) From 21db1b32c3778e314a7cebc3cd42aca11119f4b0 Mon Sep 17 00:00:00 2001 From: Dhaifallah Alwadani Date: Wed, 28 Jun 2017 16:33:17 +0300 Subject: [PATCH 6/6] Set currencies and fix strings --- num2words/lang_AR.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/num2words/lang_AR.py b/num2words/lang_AR.py index aee780ba..a43c9686 100644 --- a/num2words/lang_AR.py +++ b/num2words/lang_AR.py @@ -91,12 +91,12 @@ def to_ordinal_num(self, value): def to_year(self, val, longval=True): if not (val//100)%10: return self.to_cardinal(val) - return self.to_splitnum(val, hightxt="hundred", jointxt="and", + return self.to_splitnum(val, hightxt="مئة", jointxt="و", longval=longval) def to_currency(self, val, longval=True): - return self.to_splitnum(val, hightxt="dollar/s", lowtxt="cent/s", - jointxt="and", longval=longval, cents = True) + return self.to_splitnum(val, hightxt="ريال", lowtxt="هللة", + jointxt="و", longval=longval, cents = True) n2w = Num2Word_AR()