From b421a1ca4a74612577451a0dd1db97ddcdba9d73 Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Wed, 23 Aug 2023 00:23:05 -0400
Subject: [PATCH] Remove lexing for colon-matching use cases

---
 .../src/rules/flake8_simplify/rules/ast_if.rs | 22 +++++-----
 .../rules/flake8_simplify/rules/ast_with.rs   | 32 ++++++---------
 crates/ruff_python_parser/src/lib.rs          | 40 +++----------------
 3 files changed, 27 insertions(+), 67 deletions(-)

diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs
index c2bf0b629ab58..27d1b551421c8 100644
--- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs
@@ -11,8 +11,8 @@ use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
 use ruff_python_ast::helpers::{any_over_expr, contains_effect};
 use ruff_python_ast::stmt_if::{if_elif_branches, IfElifBranch};
-use ruff_python_parser::first_colon_range;
 use ruff_python_semantic::SemanticModel;
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
 use ruff_source_file::{Locator, UniversalNewlines};
 
 use crate::checkers::ast::Checker;
@@ -369,16 +369,10 @@ pub(crate) fn nested_if_statements(
     };
 
     // Find the deepest nested if-statement, to inform the range.
-    let Some((test, first_stmt)) = find_last_nested_if(body) else {
+    let Some((test, _first_stmt)) = find_last_nested_if(body) else {
         return;
     };
 
-    let colon = first_colon_range(
-        TextRange::new(test.end(), first_stmt.start()),
-        checker.locator().contents(),
-        checker.source_type.is_jupyter(),
-    );
-
     // Check if the parent is already emitting a larger diagnostic including this if statement
     if let Some(Stmt::If(stmt_if)) = parent {
         if let Some((body, _range, _is_elif)) = nested_if_body(stmt_if) {
@@ -392,10 +386,14 @@ pub(crate) fn nested_if_statements(
         }
     }
 
-    let mut diagnostic = Diagnostic::new(
-        CollapsibleIf,
-        colon.map_or(range, |colon| TextRange::new(range.start(), colon.end())),
-    );
+    let Some(colon) = SimpleTokenizer::starts_at(test.end(), checker.locator().contents())
+        .skip_trivia()
+        .find(|token| token.kind == SimpleTokenKind::Colon)
+    else {
+        return;
+    };
+
+    let mut diagnostic = Diagnostic::new(CollapsibleIf, TextRange::new(range.start(), colon.end()));
     if checker.patch(diagnostic.kind.rule()) {
         // The fixer preserves comments in the nested body, but removes comments between
         // the outer and inner if statements.
diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs
index 528b82e050065..95fe3d66bebef 100644
--- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs
@@ -1,12 +1,12 @@
 use log::error;
-use ruff_python_ast::{self as ast, Ranged, Stmt, WithItem};
-use ruff_text_size::TextRange;
 
 use ruff_diagnostics::{AutofixKind, Violation};
 use ruff_diagnostics::{Diagnostic, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_parser::first_colon_range;
+use ruff_python_ast::{self as ast, Ranged, Stmt, WithItem};
+use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
 use ruff_source_file::UniversalNewlines;
+use ruff_text_size::TextRange;
 
 use crate::checkers::ast::Checker;
 use crate::line_width::LineWidth;
@@ -106,32 +106,24 @@ pub(crate) fn multiple_with_statements(
         }
     }
 
-    if let Some((is_async, items, body)) = next_with(&with_stmt.body) {
+    if let Some((is_async, items, _body)) = next_with(&with_stmt.body) {
         if is_async != with_stmt.is_async {
             // One of the statements is an async with, while the other is not,
             // we can't merge those statements.
             return;
         }
 
-        let last_item = items.last().expect("Expected items to be non-empty");
-        let colon = first_colon_range(
-            TextRange::new(
-                last_item
-                    .optional_vars
-                    .as_ref()
-                    .map_or(last_item.context_expr.end(), |v| v.end()),
-                body.first().expect("Expected body to be non-empty").start(),
-            ),
-            checker.locator().contents(),
-            checker.source_type.is_jupyter(),
-        );
+        let Some(colon) = items.last().and_then(|item| {
+            SimpleTokenizer::starts_at(item.end(), checker.locator().contents())
+                .skip_trivia()
+                .find(|token| token.kind == SimpleTokenKind::Colon)
+        }) else {
+            return;
+        };
 
         let mut diagnostic = Diagnostic::new(
             MultipleWithStatements,
-            colon.map_or_else(
-                || with_stmt.range(),
-                |colon| TextRange::new(with_stmt.start(), colon.end()),
-            ),
+            TextRange::new(with_stmt.start(), colon.end()),
         );
         if checker.patch(diagnostic.kind.rule()) {
             if !checker
diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs
index 0e0d607a12768..dc9396393e7ee 100644
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@@ -109,7 +109,6 @@
 //! [parsing]: https://en.wikipedia.org/wiki/Parsing
 //! [lexer]: crate::lexer
 
-use crate::lexer::LexResult;
 pub use parser::{
     parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
     parse_suite, parse_tokens, ParseError, ParseErrorType,
@@ -119,6 +118,8 @@ use ruff_text_size::{TextRange, TextSize};
 pub use string::FStringErrorType;
 pub use token::{StringKind, Tok, TokenKind};
 
+use crate::lexer::LexResult;
+
 mod function;
 // Skip flattening lexer to distinguish from full ruff_python_parser
 mod context;
@@ -159,25 +160,6 @@ pub fn parse_program_tokens(
     }
 }
 
-/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
-pub fn first_colon_range(
-    range: TextRange,
-    source: &str,
-    is_jupyter_notebook: bool,
-) -> Option<TextRange> {
-    let contents = &source[range];
-    let mode = if is_jupyter_notebook {
-        Mode::Jupyter
-    } else {
-        Mode::Module
-    };
-    let range = lexer::lex_starts_at(contents, mode, range.start())
-        .flatten()
-        .find(|(tok, _)| tok.is_colon())
-        .map(|(_, range)| range);
-    range
-}
-
 /// Extract all [`CmpOp`] operators from an expression snippet, with appropriate
 /// ranges.
 ///
@@ -373,24 +355,12 @@ mod python {
 
 #[cfg(test)]
 mod tests {
-    use crate::{first_colon_range, locate_cmp_ops, parse_expression, LocatedCmpOp};
     use anyhow::Result;
-    use ruff_python_ast::CmpOp;
 
-    use ruff_text_size::{TextLen, TextRange, TextSize};
+    use ruff_python_ast::CmpOp;
+    use ruff_text_size::TextSize;
 
-    #[test]
-    fn extract_first_colon_range() {
-        let contents = "with a: pass";
-        let range = first_colon_range(
-            TextRange::new(TextSize::from(0), contents.text_len()),
-            contents,
-            false,
-        )
-        .unwrap();
-        assert_eq!(&contents[range], ":");
-        assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
-    }
+    use crate::{locate_cmp_ops, parse_expression, LocatedCmpOp};
 
     #[test]
     fn extract_cmp_op_location() -> Result<()> {