GenericMappingTools · seisman · Jul 23, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/doc/techref/encodings.md b/doc/techref/encodings.md
@@ -106,3 +106,26 @@ the Unicode character set.
 | **\35x** | &#x27a8; | &#x27a9; | &#x27aa; | &#x27ab; | &#x27ac; | &#x27ad; | &#x27ae; | &#x27af; |
 | **\36x** | &#xfffd; | &#x27b1; | &#x27b2; | &#x27b3; | &#x27b4; | &#x27b5; | &#x27b6; | &#x27b7; |
 | **\37x** | &#x27b8; | &#x27b9; | &#x27ba; | &#x27bb; | &#x27bc; | &#x27bd; | &#x27be; | &#xfffd; |
+
+## ISO/IEC 8859
+
+GMT also supports the ISO/IEC 8859 standard for 8-bit character encodings. Refer to
+https://en.wikipedia.org/wiki/ISO/IEC_8859 for descriptions of the different parts of the standard.
+
+For a list of the characters in each part of the standard, refer to the following links:
+
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-1
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-2
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-3
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-4
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-5
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-6
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-7
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-8
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-9
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-10
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-11
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-13
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-14
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-15
+- https://en.wikipedia.org/wiki/ISO/IEC_8859-16
diff --git a/examples/gallery/images/rgb_image.py b/examples/gallery/images/rgb_image.py
@@ -38,6 +38,6 @@
         grid=image,
         # Use a map scale where 1 cm on the map equals 1 km on the ground
         projection="x1:100000",
-        frame=[r"WSne+tL@!a¯hain@!a¯, Hawai`i on 9 Aug 2023", "af"],
+        frame=[r"WSne+tLāhainā, Hawai`i on 9 Aug 2023", "af"],
     )
 fig.show()
diff --git a/pygmt/encodings.py b/pygmt/encodings.py
@@ -1,11 +1,11 @@
 """
-Adobe character encodings supported by GMT.
+Character encodings supported by GMT.
 
-Currently, only Adobe Symbol, Adobe ZapfDingbats, and Adobe ISOLatin1+ encodings are
-supported.
+Currently, Adobe Symbol, Adobe ZapfDingbats, Adobe ISOLatin1+ and ISO-8859-x (x can be
+1-11, 13-16) encodings are supported. Adobe Standard+ encoding is not supported.
 
 The corresponding Unicode characters in each Adobe character encoding are generated
-from the mapping table and conversion script in the GMT-octal-codes
+from the mapping tables and conversion scripts in the GMT-octal-codes
 (https://github.com/seisman/GMT-octal-codes) repository. Refer to that repository for
 details.
 
@@ -22,8 +22,11 @@
 - Adobe Symbol: https://en.wikipedia.org/wiki/Symbol_(typeface)
 - Zapf Dingbats: https://en.wikipedia.org/wiki/Zapf_Dingbats
 - Adobe Glyph List: https://github.com/adobe-type-tools/agl-aglfn
+- ISO-8859-x: https://en.wikipedia.org/wiki/ISO/IEC_8859-1
 """
 
+import codecs
+
 # Dictionary of character mappings for different encodings.
 charset: dict = {}
 
@@ -129,3 +132,12 @@
         strict=False,
     )
 )
+
+# ISO-8859-x charsets and x can be 1-11, 13-16.
+for i in range(1, 17):
+    if i == 12:  # ISO-8859-2 was abandoned.
+        continue
+    charset[f"ISO-8859-{i}"] = {
+        code: codecs.decode(bytes([code]), f"iso8859_{i}", errors="replace")
+        for code in [*range(0o040, 0o200), *range(0o240, 0o400)]
+    }
diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py
@@ -18,6 +18,7 @@
     args_in_kwargs,
     build_arg_list,
     build_arg_string,
+    check_encoding,
     data_kind,
     is_nonstr_iter,
     launch_external_viewer,

diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py
@@ -205,7 +205,55 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
     return kind
 
 
-def non_ascii_to_octal(argstr: str) -> str:
+def check_encoding(argstr: str) -> str:
-def check_encoding(argstr: str) -> str:
+def check_encoding(argstr: str) -> Literal[
+    "ascii",
+    "ISOLatin1+",
+    "ISO-8859-1",
+    "ISO-8859-2",
+    "ISO-8859-3",
+    "ISO-8859-4",
+    "ISO-8859-5",
+    "ISO-8859-6",
+    "ISO-8859-7",
+    "ISO-8859-8",
+    "ISO-8859-9",
+    "ISO-8859-10",
+    "ISO-8859-11",
+    "ISO-8859-13",
+    "ISO-8859-14",
+    "ISO-8859-15",
+    "ISO-8859-16",
+    "ISO-8859-17",
+]:
-def check_encoding(argstr: str) -> str:
+def check_encoding(argstr: str) -> Literal[
+    "ascii",
+    "ISOLatin1+",
+    "ISO-8859-1",
+    "ISO-8859-2",
+    "ISO-8859-3",
+    "ISO-8859-4",
+    "ISO-8859-5",
+    "ISO-8859-6",
+    "ISO-8859-7",
+    "ISO-8859-8",
+    "ISO-8859-9",
+    "ISO-8859-10",
+    "ISO-8859-11",
+    "ISO-8859-13",
+    "ISO-8859-14",
+    "ISO-8859-15",
+    "ISO-8859-16",
+    "ISO-8859-17",
+]:
+    """
+    Check the charset encoding of a string.
+
+    All characters in the string must be in a single charset encoding, otherwise the
+    default ISOLatin1+ encoding is returned. Characters in the Symbol and ZapfDingbats
+    fonts are also checked because they're independent on the setting of charset.
+
+    Parameters
+    ----------
+    argstr
+        The string to be checked.
+
+    Returns
+    -------
+    encoding
+        The encoding of the string.
+
+    Examples
+    --------
+    >>> check_encoding("123ABC+-?!")  # ASCII characters only
+    'ISOLatin1+'
+    >>> check_encoding("12AB±β①②")  # Characters in ISOLatin1+
+    'ISOLatin1+'
+    >>> check_encoding("12ABāáâãäåβ①②")  # Characters in ISO-8859-4
+    'ISO-8859-4'
+    >>> check_encoding("12ABŒā")  # Mix characters in ISOLatin1+ (Œ) and ISO-8859-4 (ā)
+    'ISOLatin1+'
+    >>> check_encoding("123AB中文")  # Characters not in any charset encoding
+    'ISOLatin1+'
+    """
+    # Loop through all supported encodings and check if all characters in the string
+    # are in the charset of the encoding. If all characters are in the charset, return
+    # the encoding. The ISOLatin1+ encoding is checked first because it is the default
+    # and most common encoding.
+    adobe_chars = set(charset["Symbol"].values()) | set(
+        charset["ZapfDingbats"].values()
+    )
+    for encoding in ["ISOLatin1+"] + [f"ISO-8859-{i}" for i in range(1, 17)]:
+        if encoding == "ISO-8859-12":  # ISO-8859-12 was abandoned. Skip it.
+            continue
+        if all(c in (set(charset[encoding].values()) | adobe_chars) for c in argstr):
+            return encoding
+    # Return the "ISOLatin1+" encoding if the string contains characters from multiple
+    # charset encodings or contains characters that are not in any charset encoding.
+    return "ISOLatin1+"
+
+
+def non_ascii_to_octal(argstr: str, encoding: str = "ISOLatin1+") -> str:
     r"""
     Translate non-ASCII characters to their corresponding octal codes.
 
@@ -216,6 +264,8 @@ def non_ascii_to_octal(argstr: str) -> str:
     ----------
     argstr
         The string to be translated.
+    encoding
+        The encoding of characters in the string.
 
     Returns
     -------
@@ -232,6 +282,8 @@ def non_ascii_to_octal(argstr: str) -> str:
     '@%34%\\041@%%@%34%\\176@%%@%34%\\241@%%@%34%\\376@%%'
     >>> non_ascii_to_octal("ABC ±120° DEF α ♥")
     'ABC \\261120\\260 DEF @~\\141@~ @%34%\\252@%%'
+    >>> non_ascii_to_octal("12ABāáâãäåβ①②", encoding="ISO-8859-4")
+    '12AB\\340\\341\\342\\343\\344\\345@~\\142@~@%34%\\254@%%@%34%\\255@%%'
     """  # noqa: RUF002
     # Return the string if it only contains printable ASCII characters from 32 to 126.
     if all(32 <= ord(c) <= 126 for c in argstr):
@@ -245,8 +297,8 @@ def non_ascii_to_octal(argstr: str) -> str:
     mapping.update(
         {c: f"@%34%\\{i:03o}@%%" for i, c in charset["ZapfDingbats"].items()}
     )
-    # Adobe ISOLatin1+ charset. Put at the end.
-    mapping.update({c: f"\\{i:03o}" for i, c in charset["ISOLatin1+"].items()})
+    # ISOLatin1+ or ISO-8859-x charset.
+    mapping.update({c: f"\\{i:03o}" for i, c in charset[encoding].items()})
 
     # Remove any printable characters
     mapping = {k: v for k, v in mapping.items() if k not in string.printable}
@@ -323,6 +375,10 @@ def build_arg_list(
     ...     )
     ... )
     ['f1.txt', 'f2.txt', '-A0', '-B', '--FORMAT_DATE_MAP=o dd', '->out.txt']
+    >>> build_arg_list(dict(B="12ABāβ①②"))
+    ['-B12AB\\340@~\\142@~@%34%\\254@%%@%34%\\255@%%', '--PS_CHAR_ENCODING=ISO-8859-4']
+    >>> build_arg_list(dict(B="12ABāβ①②"), confdict=dict(PS_CHAR_ENCODING="ISO-8859-5"))
+    ['-B12AB\\340@~\\142@~@%34%\\254@%%@%34%\\255@%%', '--PS_CHAR_ENCODING=ISO-8859-5']
     >>> print(build_arg_list(dict(R="1/2/3/4", J="X4i", watre=True)))
     Traceback (most recent call last):
       ...
@@ -337,10 +393,17 @@ def build_arg_list(
         elif value is True:
             gmt_args.append(f"-{key}")
         elif is_nonstr_iter(value):
-            gmt_args.extend(non_ascii_to_octal(f"-{key}{_value}") for _value in value)
+            gmt_args.extend(f"-{key}{_value}" for _value in value)
         else:
-            gmt_args.append(non_ascii_to_octal(f"-{key}{value}"))
-    gmt_args = sorted(gmt_args)
+            gmt_args.append(f"-{key}{value}")
+
+    # Convert non-ASCII characters (if any) in the arguments to octal codes
+    encoding = check_encoding(" ".join(gmt_args))
+    gmt_args = sorted([non_ascii_to_octal(arg, encoding=encoding) for arg in gmt_args])
+
+    # Set --PS_CHAR_ENCODING=encoding if necessary
+    if encoding != "ISOLatin1+" and not (confdict and "PS_CHAR_ENCODING" in confdict):
+        gmt_args.append(f"--PS_CHAR_ENCODING={encoding}")
 
     if confdict:
         gmt_args.extend(f"--{key}={value}" for key, value in confdict.items())

diff --git a/pygmt/src/text.py b/pygmt/src/text.py
@@ -7,6 +7,7 @@
 from pygmt.exceptions import GMTInvalidInput
 from pygmt.helpers import (
     build_arg_list,
+    check_encoding,
     data_kind,
     fmt_docstring,
     is_nonstr_iter,
@@ -226,13 +227,23 @@ def text_(  # noqa: PLR0912
         kwargs["t"] = ""
 
     # Append text at last column. Text must be passed in as str type.
+    confdict = {}
     if kind == "vectors":
+        text = np.atleast_1d(text).astype(str)
+        encoding = check_encoding("".join(text))
         extra_arrays.append(
-            np.vectorize(non_ascii_to_octal)(np.atleast_1d(text).astype(str))
+            np.vectorize(non_ascii_to_octal, excluded="encoding")(
+                text, encoding=encoding
+            )
         )
+        if encoding != "ISOLatin1+":
+            confdict = {"PS_CHAR_ENCODING": encoding}
 
     with Session() as lib:
         with lib.virtualfile_in(
             check_kind="vector", data=textfiles, x=x, y=y, extra_arrays=extra_arrays
         ) as vintbl:
-            lib.call_module(module="text", args=build_arg_list(kwargs, infile=vintbl))
+            lib.call_module(
+                module="text",
+                args=build_arg_list(kwargs, infile=vintbl, confdict=confdict),
+            )
diff --git a/pygmt/tests/baseline/test_text_nonascii_iso8859.png.dvc b/pygmt/tests/baseline/test_text_nonascii_iso8859.png.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: a0f35a1d58c95e6589c7397e7660e946
+  size: 17089
+  hash: md5
+  path: test_text_nonascii_iso8859.png
diff --git a/pygmt/tests/test_text.py b/pygmt/tests/test_text.py
@@ -432,3 +432,16 @@ def test_text_quotation_marks():
     fig.basemap(projection="X4c/2c", region=[0, 4, 0, 2], frame=0)
     fig.text(x=2, y=1, text='\\234 ‘ ’ " “ ”', font="20p")  # noqa: RUF001
     return fig
+
+
+@pytest.mark.mpl_image_compare
+def test_text_nonascii_iso8859():
+    """
+    Test passing text strings with non-ascii characters in ISO-8859-4 encoding.
+    """
+    fig = Figure()
+    fig.basemap(region=[0, 10, 0, 10], projection="X10c", frame=["WSEN+tAāáâãäåB"])
+    fig.text(position="TL", text="position-text:1ÉĘËĖ2")
+    fig.text(x=1, y=1, text="xytext:1éęëė2")
+    fig.text(x=[5, 5], y=[3, 5], text=["xytext1:ųúûüũūαζ∆❡", "xytext2:íîī∑π∇✉"])
+    return fig