From 75ac76f805196e05be9b9d42552685894c35450e Mon Sep 17 00:00:00 2001 From: Joe Stewart Date: Fri, 8 May 2020 12:23:43 -0400 Subject: [PATCH 1/2] Rename mathematical character homoglyph ranges --- homoglyphs/categories.json | 85 +++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/homoglyphs/categories.json b/homoglyphs/categories.json index bdde119..6ca57db 100644 --- a/homoglyphs/categories.json +++ b/homoglyphs/categories.json @@ -80,6 +80,7 @@ "MANICHAEAN", "MARCHEN", "MASARAM_GONDI", + "MATH", "MEDEFAIDRIN", "MEETEI_MAYEK", "MENDE_KIKAKUI", @@ -9582,207 +9583,207 @@ [ 119808, 119892, - "COMMON" + "MATH" ], [ 119894, 119964, - "COMMON" + "MATH" ], [ 119966, 119967, - "COMMON" + "MATH" ], [ 119970, 119970, - "COMMON" + "MATH" ], [ 119973, 119974, - "COMMON" + "MATH" ], [ 119977, 119980, - "COMMON" + "MATH" ], [ 119982, 119993, - "COMMON" + "MATH" ], [ 119995, 119995, - "COMMON" + "MATH" ], [ 119997, 120003, - "COMMON" + "MATH" ], [ 120005, 120069, - "COMMON" + "MATH" ], [ 120071, 120074, - "COMMON" + "MATH" ], [ 120077, 120084, - "COMMON" + "MATH" ], [ 120086, 120092, - "COMMON" + "MATH" ], [ 120094, 120121, - "COMMON" + "MATH" ], [ 120123, 120126, - "COMMON" + "MATH" ], [ 120128, 120132, - "COMMON" + "MATH" ], [ 120134, 120134, - "COMMON" + "MATH" ], [ 120138, 120144, - "COMMON" + "MATH" ], [ 120146, 120485, - "COMMON" + "MATH" ], [ 120488, 120512, - "COMMON" + "MATH" ], [ 120513, 120513, - "COMMON" + "MATH" ], [ 120514, 120538, - "COMMON" + "MATH" ], [ 120539, 120539, - "COMMON" + "MATH" ], [ 120540, 120570, - "COMMON" + "MATH" ], [ 120571, 120571, - "COMMON" + "MATH" ], [ 120572, 120596, - "COMMON" + "MATH" ], [ 120597, 120597, - "COMMON" + "MATH" ], [ 120598, 120628, - "COMMON" + "MATH" ], [ 120629, 120629, - "COMMON" + "MATH" ], [ 120630, 120654, - "COMMON" + "MATH" ], [ 120655, 120655, - "COMMON" + "MATH" ], [ 120656, 120686, - "COMMON" + "MATH" ], [ 120687, 120687, - "COMMON" + "MATH" ], [ 120688, 120712, - "COMMON" + "MATH" ], [ 120713, 120713, - "COMMON" + "MATH" ], [ 120714, 120744, - "COMMON" + "MATH" ], [ 120745, 120745, - "COMMON" + "MATH" ], [ 120746, 120770, - "COMMON" + "MATH" ], [ 120771, 120771, - "COMMON" + "MATH" ], [ 120772, 120779, - "COMMON" + "MATH" ], [ 120782, 120831, - "COMMON" + "MATH" ], [ 120832, @@ -10420,4 +10421,4 @@ "INHERITED" ] ] -} \ No newline at end of file +} From 78d0aee1441fbf7b7770a3a59d10a4afe9de4fe2 Mon Sep 17 00:00:00 2001 From: Joe Stewart Date: Fri, 8 May 2020 12:25:57 -0400 Subject: [PATCH 2/2] provide a default category name to avoid a ValueError exception if none exists --- homoglyphs/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homoglyphs/core.py b/homoglyphs/core.py index abd5ec3..519c103 100644 --- a/homoglyphs/core.py +++ b/homoglyphs/core.py @@ -63,7 +63,7 @@ def detect(cls, char): # try detect category by unicodedata try: - category = unicodedata.name(char).split()[0] + category = unicodedata.name(char, default='UNKNOWN').split()[0] except TypeError: # In Python2 unicodedata.name raise error for non-unicode chars pass