Skip to content

Commit

Permalink
Merge pull request #167 from railsware/use-stringscanner-for-parse-block
Browse files Browse the repository at this point in the history
Use StringScanner instead of String#scan to improve block parsing performance
  • Loading branch information
grosser authored Jul 29, 2024
2 parents 625d433 + 98e1381 commit 4fc5b07
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Deprecate `add_rule!` (positional arguments)and `add_rule_with_offsets!` for `add_rule!` (keyword argument)
* RuleSet initialize now takes keyword argument, positional arguments are still supported but deprecated
* Removed OffsetAwareRuleSet, it's a RuleSet with optional attributes filename and offset
* Improved performance of block parsing by using StringScanner

### Version v1.18.0

Expand Down
19 changes: 13 additions & 6 deletions lib/css_parser/parser.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require 'strscan'

module CssParser
# Exception class used for any errors encountered while downloading remote files.
class RemoteFileError < IOError; end
Expand All @@ -17,6 +19,7 @@ class CircularReferenceError < StandardError; end
# [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
class Parser
USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (https://github.com/premailer/css_parser)".freeze
RULESET_TOKENIZER_RX = /\s+|\\{2,}|\\?[{}\s"]|[()]|.[^\s"{}()\\]*/.freeze
STRIP_CSS_COMMENTS_RX = %r{/\*.*?\*/}m.freeze
STRIP_HTML_COMMENTS_RX = /<!--|-->/m.freeze

Expand Down Expand Up @@ -362,11 +365,15 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:

# once we are in a rule, we will use this to store where we started if we are capturing offsets
rule_start = nil
offset = nil
start_offset = nil
end_offset = nil

block.scan(/\s+|\\{2,}|\\?[{}\s"]|[()]|.[^\s"{}()\\]*/) do |token|
scanner = StringScanner.new(block)
until scanner.eos?
# save the regex offset so that we know where in the file we are
offset = Regexp.last_match.offset(0) if options[:capture_offsets]
start_offset = scanner.pos
token = scanner.scan(RULESET_TOKENIZER_RX)
end_offset = scanner.pos

if token.start_with?('"') # found un-escaped double quote
in_string = !in_string
Expand Down Expand Up @@ -398,7 +405,7 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
media_types: current_media_queries
}
if options[:capture_offsets]
add_rule_options.merge!(filename: options[:filename], offset: rule_start..offset.last)
add_rule_options.merge!(filename: options[:filename], offset: rule_start..end_offset)
end
add_rule!(**add_rule_options)
end
Expand Down Expand Up @@ -459,7 +466,7 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
current_selectors << token

# mark this as the beginning of the selector unless we have already marked it
rule_start = offset.first if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
rule_start = start_offset if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
end
end

Expand All @@ -471,7 +478,7 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
media_types: current_media_queries
}
if options[:capture_offsets]
add_rule_options.merge!(filename: options[:filename], offset: rule_start..offset.last)
add_rule_options.merge!(filename: options[:filename], offset: rule_start..end_offset)
end
add_rule!(**add_rule_options)
end
Expand Down

0 comments on commit 4fc5b07

Please sign in to comment.