why doesn't \x89
(for example) match the literal byte value 0x89
?
#1054
-
use regex::bytes::Regex;
fn main() {
let data = b"\xab\x00\x00\x08\x76\x89\x12";
let re = Regex::new(r"\xab\x00.{2}").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"\xab\x00..").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"\xab\x00.").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"\xab\x00\x00").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"\xab").unwrap();
let r = re.find(data);
println!("{:?}", r);
println!("Hello, world!");
} result: None
None
None
None
None
Hello, world! Meanwhile in Python: import re
d = b"\xab\x00\x00\x08\x76\x89\x12"
r = re.compile(rb"\xab\x00.{2}")
print(r.findall(d))
r = re.compile(rb"\xab\x00..")
print(r.findall(d))
r = re.compile(rb"\xab\x00.")
print(r.findall(d))
r = re.compile(rb"\xab\x00")
print(r.findall(d))
r = re.compile(rb"\xab")
print(r.findall(d)) [b'\xab\x00\x00\x08']
[b'\xab\x00\x00\x08']
[b'\xab\x00\x00']
[b'\xab\x00']
[b'\xab'] Any particular flag I need to enable using RegexBuilder? Or this is intentional? Or this is a bug? |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments
-
You need to disable Unicode mode. That can be done with use regex::bytes::Regex;
fn main() {
let data = b"\xab\x00\x00\x08\x76\x89\x12";
let re = Regex::new(r"(?-u)\xab\x00.{2}").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"(?-u)\xab\x00..").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"(?-u)\xab\x00.").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"(?-u)\xab\x00\x00").unwrap();
let r = re.find(data);
println!("{:?}", r);
let re = Regex::new(r"(?-u)\xab").unwrap();
let r = re.find(data);
println!("{:?}", r);
println!("Hello, world!");
} Or via When Unicode mode is enabled, hex escapes always refer to the Unicode codepoint. So This regex crate is strictly more flexible than what you can do in Python. See: https://github.com/BurntSushi/rebar/tree/0203f1af6bb1d19071658c74e7541bcc3ad30a53/engines/python#unicode |
Beta Was this translation helpful? Give feedback.
-
Thx for ur replay, awesome mate! |
Beta Was this translation helpful? Give feedback.
You need to disable Unicode mode. That can be done with
(?-u)
in the pattern itself: