Skip to content

Commit

Permalink
data: update to UCD 16
Browse files Browse the repository at this point in the history
  • Loading branch information
BurntSushi committed Sep 29, 2024
1 parent dea2d34 commit c7d3666
Show file tree
Hide file tree
Showing 18 changed files with 2,149 additions and 625 deletions.
8 changes: 4 additions & 4 deletions regex-automata/src/nfa/thompson/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,15 @@ impl Config {
/// # if cfg!(miri) { return Ok(()); } // miri takes too long
/// use regex_automata::nfa::thompson::NFA;
///
/// // 300KB isn't enough!
/// // 400KB isn't enough!
/// NFA::compiler()
/// .configure(NFA::config().nfa_size_limit(Some(300_000)))
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
/// .build(r"\w{20}")
/// .unwrap_err();
///
/// // ... but 400KB probably is.
/// // ... but 500KB probably is.
/// let nfa = NFA::compiler()
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
/// .configure(NFA::config().nfa_size_limit(Some(500_000)))
/// .build(r"\w{20}")?;
///
/// assert_eq!(nfa.pattern_len(), 1);
Expand Down
65 changes: 45 additions & 20 deletions regex-automata/src/util/unicode_data/perl_word.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
// ucd-generate perl-word tmp/ucd-15.0.0/ --chars
// ucd-generate perl-word ucd-16.0.0 --chars
//
// Unicode version: 15.0.0.
// Unicode version: 16.0.0.
//
// ucd-generate 0.2.15 is available on crates.io.
// ucd-generate 0.3.1 is available on crates.io.

pub const PERL_WORD: &'static [(char, char)] = &[
('0', '9'),
Expand Down Expand Up @@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ࡠ', 'ࡪ'),
('ࡰ', 'ࢇ'),
('ࢉ', 'ࢎ'),
('\u{898}', '\u{8e1}'),
('\u{897}', '\u{8e1}'),
('\u{8e3}', '\u{963}'),
('०', '९'),
('ॱ', 'ঃ'),
Expand Down Expand Up @@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ಪ', 'ಳ'),
('ವ', 'ಹ'),
('\u{cbc}', 'ೄ'),
('\u{cc6}', ''),
('', '\u{ccd}'),
('\u{cc6}', '\u{cc8}'),
('\u{cca}', '\u{ccd}'),
('\u{cd5}', '\u{cd6}'),
('ೝ', 'ೞ'),
('ೠ', '\u{ce3}'),
Expand Down Expand Up @@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ᚁ', 'ᚚ'),
('ᚠ', 'ᛪ'),
('ᛮ', 'ᛸ'),
('ᜀ', ''),
('ᜟ', ''),
('ᜀ', '\u{1715}'),
('ᜟ', '\u{1734}'),
('ᝀ', '\u{1753}'),
('ᝠ', 'ᝬ'),
('ᝮ', 'ᝰ'),
Expand Down Expand Up @@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{1b00}', 'ᭌ'),
('᭐', '᭙'),
('\u{1b6b}', '\u{1b73}'),
('\u{1b80}', ''),
('\u{1b80}', '\u{1bf3}'),
('ᰀ', '\u{1c37}'),
('᱀', '᱉'),
('ᱍ', 'ᱽ'),
('ᲀ', ''),
('ᲀ', ''),
('Ა', 'Ჺ'),
('Ჽ', 'Ჿ'),
('\u{1cd0}', '\u{1cd2}'),
Expand Down Expand Up @@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('ꙿ', '\u{a6f1}'),
('ꜗ', 'ꜟ'),
('Ꜣ', 'ꞈ'),
('Ꞌ', ''),
('Ꞌ', ''),
('Ꟑ', 'ꟑ'),
('ꟓ', 'ꟓ'),
('ꟕ', ''),
('ꟕ', ''),
('ꟲ', 'ꠧ'),
('\u{a82c}', '\u{a82c}'),
('ꡀ', 'ꡳ'),
Expand All @@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{a8e0}', 'ꣷ'),
('ꣻ', 'ꣻ'),
('ꣽ', '\u{a92d}'),
('ꤰ', ''),
('ꤰ', '\u{a953}'),
('ꥠ', 'ꥼ'),
('\u{a980}', ''),
('\u{a980}', '\u{a9c0}'),
('ꧏ', '꧙'),
('ꧠ', 'ꧾ'),
('ꨀ', '\u{aa36}'),
Expand Down Expand Up @@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𐖣', '𐖱'),
('𐖳', '𐖹'),
('𐖻', '𐖼'),
('𐗀', '𐗳'),
('𐘀', '𐜶'),
('𐝀', '𐝕'),
('𐝠', '𐝧'),
Expand Down Expand Up @@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𐳀', '𐳲'),
('𐴀', '\u{10d27}'),
('𐴰', '𐴹'),
('𐵀', '𐵥'),
('\u{10d69}', '\u{10d6d}'),
('𐵯', '𐶅'),
('𐺀', '𐺩'),
('\u{10eab}', '\u{10eac}'),
('𐺰', '𐺱'),
('\u{10efd}', '𐼜'),
('𐻂', '𐻄'),
('\u{10efc}', '𐼜'),
('𐼧', '𐼧'),
('𐼰', '\u{10f50}'),
('𐽰', '\u{10f85}'),
Expand Down Expand Up @@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𑌵', '𑌹'),
('\u{1133b}', '𑍄'),
('𑍇', '𑍈'),
('𑍋', '𑍍'),
('𑍋', '\u{1134d}'),
('𑍐', '𑍐'),
('\u{11357}', '\u{11357}'),
('𑍝', '𑍣'),
('\u{11366}', '\u{1136c}'),
('\u{11370}', '\u{11374}'),
('𑎀', '𑎉'),
('𑎋', '𑎋'),
('𑎎', '𑎎'),
('𑎐', '𑎵'),
('𑎷', '\u{113c0}'),
('\u{113c2}', '\u{113c2}'),
('\u{113c5}', '\u{113c5}'),
('\u{113c7}', '𑏊'),
('𑏌', '𑏓'),
('\u{113e1}', '\u{113e2}'),
('𑐀', '𑑊'),
('𑑐', '𑑙'),
('\u{1145e}', '𑑡'),
Expand All @@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𑙐', '𑙙'),
('𑚀', '𑚸'),
('𑛀', '𑛉'),
('𑛐', '𑛣'),
('𑜀', '𑜚'),
('\u{1171d}', '\u{1172b}'),
('𑜰', '𑜹'),
Expand All @@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𑩐', '\u{11a99}'),
('𑪝', '𑪝'),
('𑪰', '𑫸'),
('𑯀', '𑯠'),
('𑯰', '𑯹'),
('𑰀', '𑰈'),
('𑰊', '\u{11c36}'),
('\u{11c38}', '𑱀'),
Expand All @@ -618,15 +636,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('\u{11f00}', '𑼐'),
('𑼒', '\u{11f3a}'),
('𑼾', '\u{11f42}'),
('𑽐', '𑽙'),
('𑽐', '\u{11f5a}'),
('𑾰', '𑾰'),
('𒀀', '𒎙'),
('𒐀', '𒑮'),
('𒒀', '𒕃'),
('𒾐', '𒿰'),
('𓀀', '𓐯'),
('\u{13440}', '\u{13455}'),
('𓑠', '𔏺'),
('𔐀', '𔙆'),
('𖄀', '𖄹'),
('𖠀', '𖨸'),
('𖩀', '𖩞'),
('𖩠', '𖩩'),
Expand All @@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𖭐', '𖭙'),
('𖭣', '𖭷'),
('𖭽', '𖮏'),
('𖵀', '𖵬'),
('𖵰', '𖵹'),
('𖹀', '𖹿'),
('𖼀', '𖽊'),
('\u{16f4f}', '𖾇'),
('\u{16f8f}', '𖾟'),
('𖿠', '𖿡'),
('𖿣', '\u{16fe4}'),
('𖿰', '𖿱'),
('\u{16ff0}', '\u{16ff1}'),
('𗀀', '𘟷'),
('𘠀', '𘳕'),
('𘴀', '𘴈'),
('𘳿', '𘴈'),
('𚿰', '𚿳'),
('𚿵', '𚿻'),
('𚿽', '𚿾'),
Expand All @@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𛲀', '𛲈'),
('𛲐', '𛲙'),
('\u{1bc9d}', '\u{1bc9e}'),
('𜳰', '𜳹'),
('\u{1cf00}', '\u{1cf2d}'),
('\u{1cf30}', '\u{1cf46}'),
('\u{1d165}', '\u{1d169}'),
('𝅭', '\u{1d172}'),
('\u{1d16d}', '\u{1d172}'),
('\u{1d17b}', '\u{1d182}'),
('\u{1d185}', '\u{1d18b}'),
('\u{1d1aa}', '\u{1d1ad}'),
Expand Down Expand Up @@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𞊐', '\u{1e2ae}'),
('𞋀', '𞋹'),
('𞓐', '𞓹'),
('𞗐', '𞗺'),
('𞟠', '𞟦'),
('𞟨', '𞟫'),
('𞟭', '𞟮'),
Expand Down Expand Up @@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
('𫝀', '𫠝'),
('𫠠', '𬺡'),
('𬺰', '𮯠'),
('𮯰', '𮹝'),
('丽', '𪘀'),
('𰀀', '𱍊'),
('𱍐', '𲎯'),
Expand Down
21 changes: 21 additions & 0 deletions regex-syntax/src/hir/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3143,10 +3143,31 @@ mod tests {
#[cfg(feature = "unicode-script")]
assert_eq!(
t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
// Class({
// '·'..='·',
// '\u{300}'..='\u{301}',
// '\u{304}'..='\u{304}',
// '\u{306}'..='\u{306}',
// '\u{308}'..='\u{308}',
// '\u{313}'..='\u{313}',
// '\u{342}'..='\u{342}',
// '\u{345}'..='\u{345}',
// 'ʹ'..='ʹ',
// '\u{1dc0}'..='\u{1dc1}',
// '⁝'..='⁝',
// })
hir_uclass(&[
('·', '·'),
('\u{0300}', '\u{0301}'),
('\u{0304}', '\u{0304}'),
('\u{0306}', '\u{0306}'),
('\u{0308}', '\u{0308}'),
('\u{0313}', '\u{0313}'),
('\u{0342}', '\u{0342}'),
('\u{0345}', '\u{0345}'),
('ʹ', 'ʹ'),
('\u{1DC0}', '\u{1DC1}'),
('⁝', '⁝'),
])
);
assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
Expand Down
2 changes: 2 additions & 0 deletions regex-syntax/src/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,8 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
("V13_0", age::V13_0),
("V14_0", age::V14_0),
("V15_0", age::V15_0),
("V15_1", age::V15_1),
("V16_0", age::V16_0),
];
assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");

Expand Down
Loading

0 comments on commit c7d3666

Please sign in to comment.