From 3ca204bb18c4a20bcb4402c22ddb8c6241c906f3 Mon Sep 17 00:00:00 2001
From: Ben Barham <ben_barham@apple.com>
Date: Thu, 9 Jan 2025 20:32:55 -0800
Subject: [PATCH 1/2] Optimize pretty printing performance

swift-format#883 fixed outputting incorrect line numbers, but introduced
a performance regression. swift-format#901 improved this back to around
the original, but had to be reverted as it introduced a an issue due to
counting codepoints rather than characters.

Introduce a similar optimization again, but only for the first portion
of the string (prior to the last newline).

Fixes swift-format#894 again.
---
 .../PrettyPrint/PrettyPrintBuffer.swift       | 25 ++++++++------
 .../PrettyPrint/LineNumbersTests.swift        | 34 +++++++++++++++++++
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift b/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift
index 4b02d372..9ea72620 100644
--- a/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift
+++ b/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift
@@ -119,18 +119,21 @@ struct PrettyPrintBuffer {
     consecutiveNewlineCount = 0
     pendingSpaces = 0
 
-    // In case of comments, we may get a multi-line string.
-    // To account for that case, we need to correct the lineNumber count.
-    // The new column is only the position within the last line.
-    let lines = text.split(separator: "\n")
-    lineNumber += lines.count - 1
-    if lines.count > 1 {
-      // in case we have inserted new lines, we need to reset the column
-      column = lines.last?.count ?? 0
+    // In case of comments, we may get a multi-line string. To account for that case, we need to correct the
+    // `lineNumber` count. The new `column` is the position within the last line.
+
+    var lastNewlineIndex: String.Index? = nil
+    for i in text.utf8.indices {
+      if text.utf8[i] == UInt8(ascii: "\n") {
+        lastNewlineIndex = i
+        lineNumber += 1
+      }
+    }
+
+    if let lastNewlineIndex {
+      column = text.distance(from: text.utf8.index(after: lastNewlineIndex), to: text.endIndex)
     } else {
-      // in case it is an end of line comment or a single line comment,
-      // we just add to the current column
-      column += lines.last?.count ?? 0
+      column += text.count
     }
   }
 
diff --git a/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift b/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift
index 1bb58f7a..c377e7ca 100644
--- a/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift
+++ b/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift
@@ -81,4 +81,38 @@ final class LineNumbersTests: PrettyPrintTestCase {
       ]
     )
   }
+
+  func testCharacterVsCodepoint() {
+    let input =
+      """
+      let fo = 1  // 🤥
+
+      """
+
+    assertPrettyPrintEqual(
+      input: input,
+      expected: input,
+      linelength: 16,
+      whitespaceOnly: true,
+      findings: []
+    )
+  }
+
+  func testCharacterVsCodepointMultiline() {
+    let input =
+      #"""
+      /// This is a multiline
+      /// comment that is in 🤥
+      /// fact perfectly sized
+
+      """#
+
+    assertPrettyPrintEqual(
+      input: input,
+      expected: input,
+      linelength: 25,
+      whitespaceOnly: true,
+      findings: []
+    )
+  }
 }

From 83b5f010d250e3b4d3009562ed55414766201514 Mon Sep 17 00:00:00 2001
From: Rintaro Ishizaki <rishizaki@apple.com>
Date: Mon, 13 Jan 2025 10:06:44 -0800
Subject: [PATCH 2/2] [WhitespaceLinter] Use hand crafted "is whitespace"
 function

* `UnicodeScalar(_:)` on arbitrary UTF8 code point was wrong. It only
  works correctly if the code point is < 0x80
* `UnicodeScalar.Properties.isWhitespace` is slow. Profiling 'lint'
  shows it's taking 13.6 of the entire time
* Whitespaces in Unicode "Basic Latin" block are well defined, there's
  no need to consult `UnicodeScalar.Properties`
---
 .../SwiftFormat/PrettyPrint/WhitespaceLinter.swift    | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift b/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift
index c5c5e2ae..30f73395 100644
--- a/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift
+++ b/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift
@@ -339,9 +339,16 @@ public class WhitespaceLinter {
     startingAt offset: Int,
     in data: [UTF8.CodeUnit]
   ) -> ArraySlice<UTF8.CodeUnit> {
+    func isWhitespace(_ char: UTF8.CodeUnit) -> Bool {
+      switch char {
+      case UInt8(ascii: " "), UInt8(ascii: "\n"), UInt8(ascii: "\t"), UInt8(ascii: "\r"), /*VT*/ 0x0B, /*FF*/ 0x0C:
+        return true
+      default:
+        return false
+      }
+    }
     guard
-      let whitespaceEnd =
-        data[offset...].firstIndex(where: { !UnicodeScalar($0).properties.isWhitespace })
+      let whitespaceEnd = data[offset...].firstIndex(where: { !isWhitespace($0) })
     else {
       return data[offset..<data.endIndex]
     }