From b4d986a3c9d805aa20a9667d805de9cccdd84905 Mon Sep 17 00:00:00 2001 From: Leonid Shevtsov Date: Fri, 23 Aug 2024 16:37:30 +0300 Subject: [PATCH] Improve parse_declarations! performance --- CHANGELOG.md | 1 + lib/css_parser/rule_set.rb | 37 +++++++++++++++++++++++++++---------- test/test_rule_set.rb | 13 +++++++++++++ 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 557e7eb..4f49b2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * RuleSet initialize now takes keyword argument, positional arguments are still supported but deprecated * Removed OffsetAwareRuleSet, it's a RuleSet with optional attributes filename and offset * Improved performance of block parsing by using StringScanner +* Improve `RuleSet#parse_declarations!` performance by using substring search istead of regexps ### Version v1.18.0 diff --git a/lib/css_parser/rule_set.rb b/lib/css_parser/rule_set.rb index 7f87816..82c7a99 100644 --- a/lib/css_parser/rule_set.rb +++ b/lib/css_parser/rule_set.rb @@ -26,6 +26,11 @@ class RuleSet WHITESPACE_REPLACEMENT = '___SPACE___' + # Tokens for parse_declarations! + COLON = ':'.freeze + SEMICOLON = ';'.freeze + LPAREN = '('.freeze + RPAREN = ')'.freeze class Declarations class Value attr_reader :value @@ -647,20 +652,32 @@ def parse_declarations!(block) # :nodoc: return unless block continuation = nil - block.split(/[;$]+/m).each do |decs| - decs = (continuation ? continuation + decs : decs) - if decs =~ /\([^)]*\Z/ # if it has an unmatched parenthesis - continuation = "#{decs};" - elsif (matches = decs.match(/\s*(.[^:]*)\s*:\s*(?m:(.+))(?:;?\s*\Z)/i)) - # skip end_of_declaration - property = matches[1] - value = matches[2] - add_declaration!(property, value) - continuation = nil + block.split(SEMICOLON) do |decs| + decs = (continuation ? "#{continuation};#{decs}" : decs) + if unmatched_open_parenthesis?(decs) + # Semicolon happened within parenthesis, so it is a part of the value + # the rest of the value is in the next segment + continuation = decs + next end + + next unless (colon = decs.index(COLON)) + + property = decs[0, colon] + value = decs[(colon + 1)..] + property.strip! + value.strip! + next if property.empty? || value.empty? + + add_declaration!(property, value) + continuation = nil end end + def unmatched_open_parenthesis?(declarations) + (lparen_index = declarations.index(LPAREN)) && !declarations.index(RPAREN, lparen_index) + end + #-- # TODO: way too simplistic #++ diff --git a/test/test_rule_set.rb b/test/test_rule_set.rb index e4bfd5b..d1bf9aa 100644 --- a/test/test_rule_set.rb +++ b/test/test_rule_set.rb @@ -78,6 +78,19 @@ def test_each_declaration_containing_semicolons assert_equal('no-repeat;', rs['background-repeat']) end + def test_each_declaration_with_newlines + expected = Set[ + {property: 'background-image', value: 'url(foo;bar)', is_important: false}, + {property: 'font-weight', value: 'bold', is_important: true}, + ] + rs = RuleSet.new(block: "background-image\n:\nurl(foo;bar);\n\n\n\n\n;;font-weight\n\n\n:bold\n\n\n!important") + actual = Set.new + rs.each_declaration do |prop, val, imp| + actual << {property: prop, value: val, is_important: imp} + end + assert_equal(expected, actual) + end + def test_selector_sanitization selectors = "h1, h2,\nh3 " rs = RuleSet.new(selectors: selectors, block: "color: #fff;")