From 3ca204bb18c4a20bcb4402c22ddb8c6241c906f3 Mon Sep 17 00:00:00 2001 From: Ben Barham Date: Thu, 9 Jan 2025 20:32:55 -0800 Subject: [PATCH 1/2] Optimize pretty printing performance swift-format#883 fixed outputting incorrect line numbers, but introduced a performance regression. swift-format#901 improved this back to around the original, but had to be reverted as it introduced a an issue due to counting codepoints rather than characters. Introduce a similar optimization again, but only for the first portion of the string (prior to the last newline). Fixes swift-format#894 again. --- .../PrettyPrint/PrettyPrintBuffer.swift | 25 ++++++++------ .../PrettyPrint/LineNumbersTests.swift | 34 +++++++++++++++++++ 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift b/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift index 4b02d372..9ea72620 100644 --- a/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift +++ b/Sources/SwiftFormat/PrettyPrint/PrettyPrintBuffer.swift @@ -119,18 +119,21 @@ struct PrettyPrintBuffer { consecutiveNewlineCount = 0 pendingSpaces = 0 - // In case of comments, we may get a multi-line string. - // To account for that case, we need to correct the lineNumber count. - // The new column is only the position within the last line. - let lines = text.split(separator: "\n") - lineNumber += lines.count - 1 - if lines.count > 1 { - // in case we have inserted new lines, we need to reset the column - column = lines.last?.count ?? 0 + // In case of comments, we may get a multi-line string. To account for that case, we need to correct the + // `lineNumber` count. The new `column` is the position within the last line. + + var lastNewlineIndex: String.Index? = nil + for i in text.utf8.indices { + if text.utf8[i] == UInt8(ascii: "\n") { + lastNewlineIndex = i + lineNumber += 1 + } + } + + if let lastNewlineIndex { + column = text.distance(from: text.utf8.index(after: lastNewlineIndex), to: text.endIndex) } else { - // in case it is an end of line comment or a single line comment, - // we just add to the current column - column += lines.last?.count ?? 0 + column += text.count } } diff --git a/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift b/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift index 1bb58f7a..c377e7ca 100644 --- a/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift +++ b/Tests/SwiftFormatTests/PrettyPrint/LineNumbersTests.swift @@ -81,4 +81,38 @@ final class LineNumbersTests: PrettyPrintTestCase { ] ) } + + func testCharacterVsCodepoint() { + let input = + """ + let fo = 1 // 🤥 + + """ + + assertPrettyPrintEqual( + input: input, + expected: input, + linelength: 16, + whitespaceOnly: true, + findings: [] + ) + } + + func testCharacterVsCodepointMultiline() { + let input = + #""" + /// This is a multiline + /// comment that is in 🤥 + /// fact perfectly sized + + """# + + assertPrettyPrintEqual( + input: input, + expected: input, + linelength: 25, + whitespaceOnly: true, + findings: [] + ) + } } From 83b5f010d250e3b4d3009562ed55414766201514 Mon Sep 17 00:00:00 2001 From: Rintaro Ishizaki Date: Mon, 13 Jan 2025 10:06:44 -0800 Subject: [PATCH 2/2] [WhitespaceLinter] Use hand crafted "is whitespace" function * `UnicodeScalar(_:)` on arbitrary UTF8 code point was wrong. It only works correctly if the code point is < 0x80 * `UnicodeScalar.Properties.isWhitespace` is slow. Profiling 'lint' shows it's taking 13.6 of the entire time * Whitespaces in Unicode "Basic Latin" block are well defined, there's no need to consult `UnicodeScalar.Properties` --- .../SwiftFormat/PrettyPrint/WhitespaceLinter.swift | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift b/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift index c5c5e2ae..30f73395 100644 --- a/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift +++ b/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift @@ -339,9 +339,16 @@ public class WhitespaceLinter { startingAt offset: Int, in data: [UTF8.CodeUnit] ) -> ArraySlice { + func isWhitespace(_ char: UTF8.CodeUnit) -> Bool { + switch char { + case UInt8(ascii: " "), UInt8(ascii: "\n"), UInt8(ascii: "\t"), UInt8(ascii: "\r"), /*VT*/ 0x0B, /*FF*/ 0x0C: + return true + default: + return false + } + } guard - let whitespaceEnd = - data[offset...].firstIndex(where: { !UnicodeScalar($0).properties.isWhitespace }) + let whitespaceEnd = data[offset...].firstIndex(where: { !isWhitespace($0) }) else { return data[offset..