Skip to content

Commit

Permalink
update unicode version
Browse files Browse the repository at this point in the history
  • Loading branch information
TimWhiting committed Feb 4, 2024
1 parent 43f9d01 commit b2d039e
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
11 changes: 7 additions & 4 deletions lib/std/text/unicode.kk
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ pub fun string/width( s : string ) : int {
//--------------------------------------------------------------

// These characters are considered wide, i.e. 2 columns wide.
// https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt
// https://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
// See ranges with postfix ;W
//
// Update with `python3 util/update-unicode.py -a`
// TODO: Handle 'unassigned' ranges: (Following is an excerpt from https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt )
// TODO: Handle 'unassigned' ranges: (Following is an excerpt from https://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt)
// - All code points, assigned or unassigned, that are not listed
// explicitly are given the value "N".
// - The unassigned code points in the following blocks default to "W":
Expand Down Expand Up @@ -181,7 +181,7 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x2E80,0x2E99),
single(0x2E9B,0x2EF3),
single(0x2F00,0x2FD5),
single(0x2FF0,0x2FFB),
single(0x2FF0,0x2FFF),
single(0x3001,0x3003),
single(0x3004,0x3004),
single(0x3005,0x3005),
Expand Down Expand Up @@ -238,6 +238,7 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x3196,0x319F),
single(0x31A0,0x31BF),
single(0x31C0,0x31E3),
single(0x31EF,0x31EF),
single(0x31F0,0x31FF),
single(0x3200,0x321E),
single(0x3220,0x3229),
Expand Down Expand Up @@ -383,7 +384,9 @@ val asian-wide : delayed<total,rtree> = delay{
single(0x2B820,0x2CEA1),
single(0x2CEA2,0x2CEAF),
single(0x2CEB0,0x2EBE0),
single(0x2EBE1,0x2F7FF),
single(0x2EBE1,0x2EBEF),
single(0x2EBF0,0x2EE5D),
single(0x2EE5E,0x2F7FF),
single(0x2F800,0x2FA1D),
single(0x2FA1E,0x2FA1F),
single(0x2FA20,0x2FFFD),
Expand Down
2 changes: 2 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# - support/vscode/koka.language-koka/package.json
# - whatsnew.md, readme.md

# Also update unicode asian-width list in `std/text/unicode`
# using the output of `python3 util/update-unicode.py -a`

name: koka
version: 3.0.5
Expand Down
4 changes: 2 additions & 2 deletions util/update-unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
parser.add_argument("-a", "--asian_wide", default=False, action='store_true')
args = parser.parse_args()
if args.asian_wide:
result = requests.get("https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt")
result = requests.get("https://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt")
for line in result.text.split("\n"):
values = line.split(";")
if len(values) >= 2:
width = values[1].split("#")[0].strip()
if width == "W":
charrange = values[0].split("..")
charrange = values[0].strip().split("..")
if len(charrange) == 2:
print(f" single(0x{charrange[0]},0x{charrange[1]}),")
elif len(charrange) == 1:
Expand Down

0 comments on commit b2d039e

Please sign in to comment.