automata: fix more out-dated regex-cli commands

That should cover all of them. Closes #1053
rust-lang · Oct 6, 2023 · 3f36a63 · 3f36a63
1 parent 4e873e2
commit 3f36a63
Show file tree

Hide file tree

Showing 10 changed files with 30 additions and 26 deletions.
diff --git a/regex-automata/src/dfa/accel.rs b/regex-automata/src/dfa/accel.rs
@@ -6,15 +6,16 @@
 // non-Unicode regexes. For example, consider '(?-u)[^a]+a'. We can look at its
 // DFA with regex-cli:
 //
-//     $ regex-cli debug dfa dense '(?-u)[^a]+a' -BbC
-//     dense::DFA(
+//     $ regex-cli debug dense dfa -p '(?-u)[^a]+a' -BbC --no-table
 //     D 000000:
 //     Q 000001:
 //      *000002:
-//     A 000003: \x00-` => 3, a => 5, b-\xFF => 3
-//      >000004: \x00-` => 3, a => 4, b-\xFF => 3
-//       000005: \x00-\xFF => 2, EOI => 2
-//     )
+//     A 000003: \x00-` => 3, a => 8, b-\xFF => 3
+//     A 000004: \x00-` => 4, a => 7, b-\xFF => 4
+//       000005: \x00-` => 4, b-\xFF => 4
+//       000006: \x00-` => 3, a => 6, b-\xFF => 3
+//       000007: \x00-\xFF => 2, EOI => 2
+//       000008: \x00-\xFF => 2, EOI => 2
 //
 // In particular, state 3 is accelerated (shown via the 'A' indicator) since
 // the only way to leave that state once entered is to see an 'a' byte. If

diff --git a/regex-automata/src/dfa/automaton.rs b/regex-automata/src/dfa/automaton.rs
@@ -1132,7 +1132,7 @@ pub unsafe trait Automaton {
     /// // implementation defined.
     /// //
     /// // N.B. We get '3' by inspecting the state machine using 'regex-cli'.
-    /// // e.g., try `regex-cli debug dfa dense '[^abc]+a' -BbUC`.
+    /// // e.g., try `regex-cli debug dense dfa -p '[^abc]+a' -BbUC`.
     /// let id = StateID::new(3 * dfa.stride()).unwrap();
     /// let accelerator = dfa.accelerator(id);
     /// // The `[^abc]+` sub-expression permits [a, b, c] to be accelerated.

diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs
@@ -1228,8 +1228,9 @@ impl Builder {
         } else {
             let mut set = nfa.byte_class_set().clone();
             // It is important to distinguish any "quit" bytes from all other
-            // bytes. Otherwise, a non-quit byte may end up in the same class
-            // as a quit byte, and thus cause the DFA stop when it shouldn't.
+            // bytes. Otherwise, a non-quit byte may end up in the same
+            // class as a quit byte, and thus cause the DFA to stop when it
+            // shouldn't.
             //
             // Test case:
             //

diff --git a/regex-automata/src/hybrid/dfa.rs b/regex-automata/src/hybrid/dfa.rs
@@ -2103,8 +2103,10 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// Here's an example that justifies 'inline(never)'
     ///
     /// ```ignore
-    /// regex-cli find hybrid dfa \
-    ///   @all-codepoints-utf8-100x '\pL{100}' --cache-capacity 10000000
+    /// regex-cli find match hybrid \
+    ///   --cache-capacity 100000000 \
+    ///   -p '\pL{100}'
+    ///   all-codepoints-utf8-100x
     /// ```
     ///
     /// Where 'all-codepoints-utf8-100x' is the UTF-8 encoding of every
@@ -3830,8 +3832,8 @@ impl Config {
             //
             // Test case:
             //
-            //   regex-cli find hybrid regex -w @conn.json.1000x.log \
-            //     '^#' '\b10\.55\.182\.100\b'
+            //   regex-cli find match hybrid --unicode-word-boundary \
+            //     -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log
             if !quit.is_empty() {
                 set.add_set(&quit);
             }

diff --git a/regex-automata/src/hybrid/search.rs b/regex-automata/src/hybrid/search.rs
@@ -105,14 +105,14 @@ fn find_fwd_imp(
             // PERF: For justification of omitting bounds checks, it gives us a
             // ~10% bump in search time. This was used for a benchmark:
             //
-            //     regex-cli find hybrid dfa @bigfile '(?m)^.+$' -UBb
+            //     regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
             //
             // PERF: For justification for the loop unrolling, we use a few
             // different tests:
             //
-            //     regex-cli find hybrid dfa @$bigfile '\w{50}' -UBb
-            //     regex-cli find hybrid dfa @$bigfile '(?m)^.+$' -UBb
-            //     regex-cli find hybrid dfa @$bigfile 'ZQZQZQZQ' -UBb
+            //     regex-cli find half hybrid -p '\w{50}' -UBb bigfile
+            //     regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
+            //     regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile
             //
             // And there are three different configurations:
             //
@@ -353,7 +353,7 @@ fn find_rev_imp(
             // anchored and on shorter haystacks. However, this still makes a
             // difference. Take this command for example:
             //
-            //     regex-cli find hybrid regex @$bigfile '(?m)^.+$' -UBb
+            //     regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile
             //
             // (Notice that we use 'find hybrid regex', not 'find hybrid dfa'
             // like in the justification for the forward direction. The 'regex'

diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs
@@ -1466,7 +1466,7 @@ impl Compiler {
         // compare and contrast performance of the Pike VM when the code below
         // is active vs the code above. Here's an example to try:
         //
-        //   regex-cli find match pikevm -b -p '(?m)^\w{20}' -y '@$smallishru'
+        //   regex-cli find match pikevm -b -p '(?m)^\w{20}' non-ascii-file
         //
         // With Unicode classes generated below, this search takes about 45s on
         // my machine. But with the compressed version above, the search takes

diff --git a/regex-automata/src/nfa/thompson/map.rs b/regex-automata/src/nfa/thompson/map.rs
@@ -65,7 +65,7 @@ const INIT: u64 = 14695981039346656037;
 /// Specifically, one could observe the difference with std's hashmap via
 /// something like the following benchmark:
 ///
-///   hyperfine "regex-cli debug nfa thompson --quiet --reverse '\w{90} ecurB'"
+///   hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'"
 ///
 /// But to observe that difference, you'd have to modify the code to use
 /// std's hashmap.

diff --git a/regex-automata/src/nfa/thompson/nfa.rs b/regex-automata/src/nfa/thompson/nfa.rs
@@ -1841,14 +1841,12 @@ impl SparseTransitions {
         // This is an alternative implementation that uses binary search. In
         // some ad hoc experiments, like
         //
-        //   smallishru=OpenSubtitles2018.raw.sample.smallish.ru
-        //   regex-cli find nfa thompson pikevm -b "@$smallishru" '\b\w+\b'
+        //   regex-cli find match pikevm -b -p '\b\w+\b' non-ascii-file
         //
         // I could not observe any improvement, and in fact, things seemed to
         // be a bit slower. I can see an improvement in at least one benchmark:
         //
-        //   allcpssmall=all-codepoints-utf8-10x
-        //   regex-cli find nfa thompson pikevm @$allcpssmall '\pL{100}'
+        //   regex-cli find match pikevm -b -p '\pL{100}' all-codepoints-utf8
         //
         // Where total search time goes from 3.2s to 2.4s when using binary
         // search.

diff --git a/regex-automata/src/nfa/thompson/range_trie.rs b/regex-automata/src/nfa/thompson/range_trie.rs
@@ -594,7 +594,7 @@ impl State {
         // Benchmarks suggest that binary search is just a bit faster than
         // straight linear search. Specifically when using the debug tool:
         //
-        //   hyperfine "regex-cli debug nfa thompson --quiet --reverse '\w{90} ecurB'"
+        //   hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'"
         binary_search(&self.transitions, |t| range.start <= t.range.end)
     }
 

diff --git a/regex-automata/src/util/look.rs b/regex-automata/src/util/look.rs
@@ -1024,7 +1024,9 @@ impl core::fmt::Display for UnicodeWordBoundaryError {
 // There are perhaps other choices as well. Why did I stop at these 4? Because
 // I wanted to preserve my sanity. I suspect I'll wind up adding the lazy DFA
 // approach eventually, as the benefits of the DFA approach are somewhat
-// compelling. The 'boundary-words-holmes' benchmark tests this:
+// compelling. The 'boundary-words-holmes' benchmark tests this. (Note that
+// the commands below no longer work. If necessary, we should re-capitulate
+// the benchmark from whole cloth in rebar.)
 //
 //   $ regex-cli bench measure -f boundary-words-holmes -e pikevm > dfa.csv
 //