Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support non-ASCII characters in function arguments #2584

Merged
merged 37 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
75778bf
Support non-ASCII characters
seisman Jun 24, 2023
e840c99
Support all ISOlatin1 characters
seisman Jun 24, 2023
1b6d940
Support more ISOLatin1+ characters
seisman Jun 25, 2023
1b88cd6
fix
seisman Jun 25, 2023
311b976
Update pygmt/helpers/utils.py
seisman Jun 26, 2023
672413b
Refactor to make it more readable
seisman Jun 26, 2023
2dcc288
Need to remove single quote
seisman Jun 26, 2023
c9b8254
[ci skip] Use a better reference for ASCII table
seisman Jun 26, 2023
e2947fa
Support Symbols charset
seisman Jun 26, 2023
1a84634
Support ZapfDingbats charset
seisman Jun 26, 2023
636ace0
Refactor and add more doctests
seisman Jun 26, 2023
ffabaee
Fix a symbol which is incorrectly copied from PDF
seisman Jun 27, 2023
e1b43b2
Replace octal codes with non-ASCII character in two examples
seisman Jun 27, 2023
7ea78da
Fix a typo in doctest
seisman Jun 27, 2023
487f2d8
Merge branch 'main' into non-ascii-support
seisman Jun 30, 2023
288486c
Fix some characters
seisman Jun 30, 2023
97a223d
Fix symbol characters
seisman Jun 30, 2023
4ff2e56
Add one more reference
seisman Jun 30, 2023
388109f
Add two more references
seisman Jun 30, 2023
2270d9d
Update ZapfDingbats charset
seisman Jul 1, 2023
117b6e5
Make it clear that Symbol/ZapfDingbats are from Adobe
seisman Jul 2, 2023
1798ccc
Update for ISOLatin1+ charset
seisman Jul 2, 2023
e4bedb9
Fix registered sign, copyright sign and trade mark sign
seisman Jul 2, 2023
5ef84f9
Add more notes
seisman Jul 2, 2023
490535c
Add a test for non-ascii support
seisman Jul 2, 2023
418adc5
Update the dvc file
seisman Jul 2, 2023
4b66b8a
Fix styling issue
seisman Jul 2, 2023
37b0c6a
Add docstrings
seisman Jul 2, 2023
3362b31
Update examples/gallery/embellishments/colorbar.py
seisman Jul 22, 2023
695c59b
Merge branch 'main' into non-ascii-support
seisman Aug 2, 2023
bbc223b
Remove an unused pylint directive
seisman Aug 2, 2023
35306bf
Fix a typo in doctest
seisman Aug 2, 2023
e56359e
Merge branch 'main' into non-ascii-support
seisman Aug 5, 2023
633b2f9
Merge branch 'main' into non-ascii-support
seisman Aug 16, 2023
cdb4ab1
Merge branch 'main' into non-ascii-support
seisman Aug 17, 2023
646465c
Update pygmt/helpers/utils.py
seisman Aug 21, 2023
9ce0217
Merge branch 'main' into non-ascii-support
seisman Aug 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/gallery/embellishments/colorbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# with a length/width (+w) of 4 cm by 0.5 cm, and plotted horizontally (+h)
position="g0.3/8.7+w4c/0.5c+h",
box=True,
frame=["x+lTemperature", r"y+l\260C"],
frame=["x+lTemperature", "y+l°C"],
scale=100,
)

Expand Down
5 changes: 2 additions & 3 deletions examples/gallery/symbols/text_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@
# plot a lowercase "s" of size 3.5c and use the "Times-Italic" font,
# color fill is set to "gold"
fig.plot(x=5.5, y=1.5, style="l3.5c+ts+fTimes-Italic", fill="gold", pen=pen)
# plot the pi symbol (\160 is octal code for pi) of size 3.5c, for this use
# the "Symbol" font, the outline color of the symbol is set to
# plot the pi symbol of size 3.5c, the outline color of the symbol is set to
# "darkorange", the color fill is set to "magenta4"
fig.plot(x=7, y=1.5, style="l3.5c+t\160+fSymbol,darkorange", fill="magenta4", pen=pen)
fig.plot(x=7, y=1.5, style="l3.5c+tπ+fdarkorange", fill="magenta4", pen=pen)

fig.show()
116 changes: 115 additions & 1 deletion pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import pathlib
import shutil
import string
import subprocess
import sys
import time
Expand Down Expand Up @@ -145,6 +146,119 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False):
return kind


def non_ascii_to_octal(argstr):
r"""
Translate non-ASCII characters to their corresponding octal codes.

Currently, only characters in the ISOLatin1+ charset and
Symbol/ZapfDingbats fonts are supported.

Parameters
----------
argstr : str
The string to be translated.

Returns
-------
str
The translated string.
seisman marked this conversation as resolved.
Show resolved Hide resolved

Examples
--------
>>> non_ascii_to_octal("•‰“”±°ÿ")
'\\31\\214\\216\\217\\261\\260\\377'
>>> non_ascii_to_octal("αζΔΩ∑π∇")
'@~\\141@~@~\\172@~@~\\104@~@~\\127@~@~\\344@~@~\\160@~@~\\321@~'
>>> non_ascii_to_octal("✁❞❡➾")
'@%34%\\41@%%@%34%\\176@%%@%34%\\241@%%@%34%\\376@%%'
>>> non_ascii_to_octal("ABC ±120° DEF α ♥")
'ABC \\261120\\260 DEF @~\\141@~ @%34%\\252@%%'
"""
# Dictionary mapping non-ASCII characters to octal codes
mapping = {}

# Adobe Symbol charset
# References:
# 1. https://en.wikipedia.org/wiki/Symbol_(typeface)
# 2. https://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
# Notes:
# 1. \322 and \342 are "REGISTERED SIGN SERIF" and
# "REGISTERED SIGN SANS SERIF" respectively, but only "REGISTERED SIGN"
# is available in the unicode table. So both are mapped to
# "REGISTERED SIGN". \323, \343, \324 and \344 also have the same
# problem.
# 2. Characters for \140, \275, \276 are incorrect.
mapping.update(
{
c: "@~\\" + format(i, "o") + "@~"
for c, i in zip(
" !∀#∃%&∋()∗+,−./" # \04x-05x
+ "0123456789:;<=>?" # \06x-07x
+ "≅ΑΒΧΔΕΦΓΗΙϑΚΛΜΝΟ" # \10x-11x
+ "ΠΘΡΣΤΥςΩΞΨΖ[∴]⊥_" # \12x-13x
+ "αβχδεφγηιϕκλμνο" # \14x-15x
+ "πθρστυϖωξψζ{|}∼" # \16x-17x. \177 is undefined
+ "€ϒ′≤⁄∞ƒ♣♦♥♠↔←↑→↓" # \24x-\25x
+ "°±″≥×∝∂•÷≠≡≈…↵" # \26x-27x
+ "ℵℑℜ℘⊗⊕∅∩∪⊃⊇⊄⊂⊆∈∉" # \30x-31x
+ "∠∇®©™∏√⋅¬∧∨⇔⇐⇑⇒⇓" # \32x-33x
+ "◊〈®©™∑" # \34x-35x
+ "〉∫⌠⌡", # \36x-37x. \360 and \377 are undefined
Comment on lines +250 to +257
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Symbols \140, \275 and \276 don't exist in the unicode table, so they're incorrectly shown.

Some symbols of \36x and \37x are shown as boxes in the GitHub web GUI, but they're shown correctly in Vim, so these symbols should be good.

Screenshot from 2023-08-02 15-08-53

[*range(32, 127), *range(160, 240), *range(241, 255)],
)
}
)

# Adobe ZapfDingbats charset
# References:
# 1. https://en.wikipedia.org/wiki/Zapf_Dingbats
# 2. https://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
mapping.update(
{
c: "@%34%\\" + format(i, "o") + "@%%"
for c, i in zip(
" ✁✂✃✄☎✆✇✈✉☛☞✌✍✎✏" # \04x-\05x
+ "✐✑✒✓✔✕✖✗✘✙✚✛✜✝✞✟" # \06x-\07x
+ "✠✡✢✣✤✥✦✧★✩✪✫✬✭✮✯" # \10x-\11x
+ "✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿" # \12x-\13x
+ "❀❁❂❃❄❅❆❇❈❉❊❋●❍■❏" # \14x-\15x
+ "❐❑❒▲▼◆❖◗❘❙❚❛❜❝❞" # \16x-\17x. \177 is undefined
+ "❡❢❣❤❥❦❧♣♦♥♠①②③④" # \24x-\25x. \240 is undefined
+ "⑤⑥⑦⑧⑨⑩❶❷❸❹❺❻❼❽❾❿" # \26x-\27x
+ "➀➁➂➃➄➅➆➇➈➉➊➋➌➍➎➏" # \30x-\31x
+ "➐➑➒➓➔→↔↕➘➙➚➛➜➝➞➟" # \32x-\33x
+ "➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯" # \34x-\35x
+ "➱➲➳➴➵➶➷➸➹➺➻➼➽➾", # \36x-\37x. \360 and \377 are undefined
[*range(32, 127), *range(161, 240), *range(241, 255)],
)
}
)

# Adobe ISOLatin1+ charset (i.e., ISO-8859-1 with extensions)
# References:
# 1. https://en.wikipedia.org/wiki/ISO/IEC_8859-1
# 2. https://docs.generic-mapping-tools.org/dev/cookbook/octal-codes.html
# 3. https://www.adobe.com/jp/print/postscript/pdfs/PLRM.pdf
mapping.update(
{
c: "\\" + format(i, "o")
for c, i in zip(
"•…™—–fiž" # \03x. \030 is undefined
+ "š" # \177
+ "Œ†‡Ł⁄‹Š›œŸŽł‰„“”" # \20x-\21x
+ "ı`´ˆ˜¯˘˙¨‚˚¸'˝˛ˇ", # \22x-\23x
[*range(25, 32), *range(127, 160)],
)
}
)
# \240-\377
mapping.update({chr(i): "\\" + format(i, "o") for i in range(160, 256)})

# Remove any printable characters
mapping = {k: v for k, v in mapping.items() if k not in string.printable}
return argstr.translate(str.maketrans(mapping))


def build_arg_string(kwdict, confdict=None, infile=None, outfile=None):
r"""
Convert keyword dictionaries and input/output files into a GMT argument
Expand Down Expand Up @@ -267,7 +381,7 @@ def build_arg_string(kwdict, confdict=None, infile=None, outfile=None):
gmt_args = [str(infile)] + gmt_args
if outfile:
gmt_args.append("->" + str(outfile))
return " ".join(gmt_args)
return non_ascii_to_octal(" ".join(gmt_args))


def is_nonstr_iter(value):
Expand Down
4 changes: 4 additions & 0 deletions pygmt/tests/baseline/test_non_ascii_to_octal.png.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
outs:
- md5: d93bed7495d77cd2ef7cc1b64edb9b3a
size: 19563
path: test_non_ascii_to_octal.png
19 changes: 19 additions & 0 deletions pygmt/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import pytest
import xarray as xr
from pygmt import Figure
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
GMTTempFile,
Expand Down Expand Up @@ -57,6 +58,24 @@ def test_unique_name():
assert len(names) == len(set(names))


@pytest.mark.mpl_image_compare
def test_non_ascii_to_octal():
"""
Test support of non-ASCII characters.
"""
fig = Figure()
fig.basemap(
region=[0, 10, 0, 5],
projection="X10c/5c",
frame=[
"xaf+lISOLatin1: fi‰“”¥",
"yaf+lSymbol: αβ∇∋∈",
"WSen+tZapfDingbats: ①❷➂➍✦❝❞",
],
)
return fig


def test_kwargs_to_strings_fails():
"""
Make sure it fails for invalid conversion types.
Expand Down
Loading