From 839c9e73b1026763932f34c407bcb7df68b796b6 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 30 Apr 2024 10:52:31 +0800 Subject: [PATCH] updates --- pygmt/encodings.py | 15 ++++++++++++--- pygmt/helpers/utils.py | 21 +++++++++------------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pygmt/encodings.py b/pygmt/encodings.py index d006e1ca031..1bfe264b230 100644 --- a/pygmt/encodings.py +++ b/pygmt/encodings.py @@ -5,7 +5,7 @@ >>> from pygmt.encodings import charset >>> ->>> mappings = charset["symbol"] +>>> mappings = charset["Symbol"] >>> >>> undefined = "\ufffd" >>> markdown = "| octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |\n" @@ -35,7 +35,7 @@ # same issue. # 3. Character \140 does not appear in Unicode. # 4. \ufffd means the character is undefined. -charset["symbol"] = dict( +charset["Symbol"] = dict( zip( [*range(0o040, 0o200), *range(0o240, 0o400)], "\u0020\u0021\u2200\u0023\u2203\u0025\u0026\u220b" @@ -71,7 +71,7 @@ # References: # 1. https://en.wikipedia.org/wiki/Zapf_Dingbats # 2. https://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt -charset["zdingbat"] = dict( +charset["ZapfDingbats"] = dict( zip( [*range(0o040, 0o220), *range(0o240, 0o400)], "\u0020\u2701\u2702\u2703\u2704\u260e\u2706\u2707" @@ -108,12 +108,21 @@ # # References: # 1. https://en.wikipedia.org/wiki/ISO/IEC_8859-1 +# 2. https://en.wikipedia.org/wiki/PostScript_Latin_1_Encoding # 2. https://docs.generic-mapping-tools.org/dev/reference/octal-codes.html # 3. https://www.adobe.com/jp/print/postscript/pdfs/PLRM.pdf # 4. https://github.com/adobe-type-tools/agl-aglfn/blob/master/aglfn.txt charset["ISOLatin1+"] = { i: chr(i) for i in [*range(0o040, 0o177), *range(0o241, 0o400)] } +# \047 and \140 are apostrophe (') and backtick (`) in ISO-8859-1, but are right/left +# single quotation marks (’ and ‘) in ISOLatin1+ Encoding. # noqa: RUF003 +charset["ISOLatin1+"].update( + { + 0o140: "\u2018", # LEFT SINGLE QUOTATION MARK ‘ # noqa: RUF003 + 0o047: "\u2019", # RIGHT SINGLE QUOTATION MARK ’ # noqa: RUF003 + } +) charset["ISOLatin1+"].update( dict( zip( diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index db4dd2224b5..d8c8b71ecdb 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -206,21 +206,21 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data return kind -def non_ascii_to_octal(argstr): +def non_ascii_to_octal(argstr: str) -> str: r""" Translate non-ASCII characters to their corresponding octal codes. - Currently, only characters in the ISOLatin1+ charset and - Symbol/ZapfDingbats fonts are supported. + Currently, only characters in the ISOLatin1+ charset and Symbol/ZapfDingbats fonts + are supported. Parameters ---------- - argstr : str + argstr The string to be translated. Returns ------- - translated_argstr : str + translated_argstr The translated string. Examples @@ -241,14 +241,11 @@ def non_ascii_to_octal(argstr): # Dictionary mapping non-ASCII characters to octal codes mapping = {} # Adobe Symbol charset. - mapping.update({c: f"@~\\{i:03o}@~" for i, c in charset["symbol"].items()}) - # Adobe ZDingbat charset. - mapping.update({c: f"@%34%\\{i:o}@%%" for i, c in charset["zdingbat"].items()}) - - # Adobe ISOLatin1+ charset (i.e., ISO-8859-1 with extensions). + mapping.update({c: f"@~\\{i:03o}@~" for i, c in charset["Symbol"].items()}) + # Adobe ZapfDingbats charset. + mapping.update({c: f"@%34%\\{i:o}@%%" for i, c in charset["ZapfDingbats"].items()}) + # Adobe ISOLatin1+ charset. mapping.update({c: f"\\{i:o}" for i, c in charset["ISOLatin1+"].items()}) - mapping["\u2018"] = "\\140" # LEFT SINGLE QUOTATION MARK ‘ # noqa: RUF003 - mapping["\u2019"] = "\\047" # RIGHT SINGLE QUOTATION MARK ’ # noqa: RUF003 # Remove any printable characters mapping = {k: v for k, v in mapping.items() if k not in string.printable}