diff --git a/Gemfile.lock b/Gemfile.lock
index 2de758d..e53e5bf 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -3,6 +3,7 @@ PATH
specs:
css_parser (1.18.0)
addressable
+ crass (~> 1.0)
GEM
remote: https://rubygems.org/
@@ -12,6 +13,7 @@ GEM
ast (2.4.2)
benchmark-ips (2.13.0)
bump (0.10.0)
+ crass (1.0.6)
debug (1.9.2)
irb (~> 1.10)
reline (>= 0.3.8)
@@ -68,8 +70,8 @@ GEM
webrick (1.8.1)
PLATFORMS
+ arm64-darwin-22
java
- ruby
DEPENDENCIES
benchmark-ips
diff --git a/css_parser.gemspec b/css_parser.gemspec
index d8cf155..d02094d 100644
--- a/css_parser.gemspec
+++ b/css_parser.gemspec
@@ -18,5 +18,6 @@ Gem::Specification.new name, CssParser::VERSION do |s|
s.metadata['bug_tracker_uri'] = 'https://github.com/premailer/css_parser/issues'
s.metadata['rubygems_mfa_required'] = 'true'
- s.add_runtime_dependency 'addressable'
+ s.add_dependency 'addressable'
+ s.add_dependency 'crass', '~> 1.0'
end
diff --git a/lib/css_parser.rb b/lib/css_parser.rb
index 37426c2..8d71587 100644
--- a/lib/css_parser.rb
+++ b/lib/css_parser.rb
@@ -7,6 +7,7 @@
require 'zlib'
require 'stringio'
require 'iconv' unless String.method_defined?(:encode)
+require 'crass'
require 'css_parser/version'
require 'css_parser/rule_set'
diff --git a/lib/css_parser/parser.rb b/lib/css_parser/parser.rb
index 4e42149..e400726 100644
--- a/lib/css_parser/parser.rb
+++ b/lib/css_parser/parser.rb
@@ -17,12 +17,6 @@ class CircularReferenceError < StandardError; end
# [io_exceptions] Throw an exception if a link can not be found. Boolean, default is true.
class Parser
USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (https://github.com/premailer/css_parser)".freeze
- STRIP_CSS_COMMENTS_RX = %r{/\*.*?\*/}m.freeze
- STRIP_HTML_COMMENTS_RX = //m.freeze
-
- # Initial parsing
- RE_AT_IMPORT_RULE = /@import\s*(?:url\s*)?(?:\()?(?:\s*)["']?([^'"\s)]*)["']?\)?([\w\s,^\]()]*)\)?[;\n]?/.freeze
-
MAX_REDIRECTS = 3
# Array of CSS files that have been loaded.
@@ -97,6 +91,25 @@ def find_rule_sets(selectors, media_types = :all)
rule_sets
end
+ private def split_media_query_by_or_condition(media_query_selector)
+ media_query_selector
+ .each_with_object([[]]) do |token, sum|
+ # comma is the same as or
+ # https://developer.mozilla.org/en-US/docs/Web/CSS/@media#logical_operators
+ case token
+ in node: :comma
+ sum << []
+ in node: :ident, value: "or"
+ sum << []
+ else
+ sum.last << token
+ end
+ end
+ .map { Crass::Parser.stringify(_1).strip }
+ .reject { _1.empty? }
+ .map { _1.to_sym }
+ end
+
# Add a raw block of CSS.
#
# In order to follow +@import+ rules you must supply either a
@@ -122,46 +135,135 @@ def add_block!(block, options = {})
options[:media_types] = [options[:media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt) }
options[:only_media_types] = [options[:only_media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt) }
- block = cleanup_block(block, options)
-
+ # TODO: Would be nice to skip this step too
if options[:base_uri] and @options[:absolute_paths]
block = CssParser.convert_uris(block, options[:base_uri])
end
- # Load @imported CSS
- if @options[:import]
- block.scan(RE_AT_IMPORT_RULE).each do |import_rule|
- media_types = []
- if (media_string = import_rule[-1])
- media_string.split(',').each do |t|
- media_types << CssParser.sanitize_media_query(t) unless t.empty?
+ current_media_queries = [:all]
+ if options[:media_types]
+ current_media_queries = options[:media_types].flatten.collect { |mt| CssParser.sanitize_media_query(mt) }
+ end
+
+ Crass.parse(block).each do |node|
+ case node
+ in node: :style_rule
+ declarations = create_declaration_from_properties(node[:children])
+
+ add_rule_options = {
+ selectors: node[:selector][:value],
+ block: declarations,
+ media_types: current_media_queries
+ }
+ if options[:capture_offsets]
+ add_rule_options.merge!(
+ filename: options[:filename],
+ offset: node[:selector][:tokens].first[:pos]..node[:children].last[:pos],
+ )
+ end
+
+ add_rule!(**add_rule_options)
+ in node: :at_rule, name: "media"
+ new_media_queries = split_media_query_by_or_condition(node[:prelude])
+ add_block!(node[:block], options.merge(media_types:new_media_queries))
+
+ in node: :at_rule, name: "page"
+ declarations = create_declaration_from_properties(Crass.parse_properties(node[:block]))
+ add_rule_options = {
+ selectors: "@page#{Crass::Parser.stringify(node[:prelude])}",
+ block: declarations,
+ media_types: current_media_queries
+ }
+ if options[:capture_offsets]
+ add_rule_options.merge!(
+ filename: options[:filename],
+ offset: node[:tokens].first[:pos]..node[:tokens].last[:pos],
+ )
+ end
+ add_rule!(**add_rule_options)
+
+ in node: :at_rule, name: "font-face"
+ declarations = create_declaration_from_properties(Crass.parse_properties(node[:block]))
+ add_rule_options = {
+ selectors: "@font-face#{Crass::Parser.stringify(node[:prelude])}",
+ block: declarations,
+ media_types: current_media_queries
+ }
+ if options[:capture_offsets]
+ add_rule_options.merge!(
+ filename: options[:filename],
+ offset: node[:tokens].first[:pos]..node[:tokens].last[:pos],
+ )
+ end
+ add_rule!(**add_rule_options)
+
+ in node: :at_rule, name: "import"
+ next unless @options[:import]
+ import = nil
+ import_options = options.slice(:capture_offsets, :base_uri, :base_dir)
+
+ prelude = node[:prelude].each
+ while (token = prelude.next)
+ case token
+ in node: :whitespace # nothing
+ in node: :string
+ import = {type: :file, path: token[:value]}
+ break
+ in node: :function, name: "url"
+ import = {type: :url, path: token[:value].first[:value]}
+ break
end
- else
- media_types = [:all]
end
- next unless options[:only_media_types].include?(:all) or media_types.empty? or !(media_types & options[:only_media_types]).empty?
+ media_query_section = []
+ begin
+ while true
+ media_query_section << prelude.next
+ end
+ rescue StopIteration
+ end
- import_path = import_rule[0].to_s.gsub(/['"]*/, '').strip
+ import_options[:media_types] = split_media_query_by_or_condition(media_query_section)
+ if import_options[:media_types].empty?
+ import_options[:media_types] = [:all]
+ end
- import_options = {media_types: media_types}
- import_options[:capture_offsets] = true if options[:capture_offsets]
+ unless options[:only_media_types].include?(:all) or !(import_options[:media_types] & options[:only_media_types]).empty?
+ next
+ end
if options[:base_uri]
- import_uri = Addressable::URI.parse(options[:base_uri].to_s) + Addressable::URI.parse(import_path)
- import_options[:base_uri] = options[:base_uri]
- load_uri!(import_uri, import_options)
+ load_uri!(
+ Addressable::URI.parse(options[:base_uri].to_s) + Addressable::URI.parse(import[:path]),
+ import_options
+ )
elsif options[:base_dir]
- import_options[:base_dir] = options[:base_dir]
- load_file!(import_path, import_options)
+ load_file!(import[:path], import_options)
end
+ in node: :whitespace # nothing
+ in node: :error # nothing
end
end
+ end
- # Remove @import declarations
- block = ignore_pattern(block, RE_AT_IMPORT_RULE, options)
+ private def create_declaration_from_properties(properties)
+ declarations = RuleSet::Declarations.new
+
+ properties.each do |child|
+ case child
+ in node: :property, value: "" # nothing, happen for { color:green; color: }
+ in node: :property
+ declarations.add_declaration!(
+ child[:name],
+ RuleSet::Declarations::Value.new(child[:value], important: child[:important])
+ )
+ in node: :whitespace # nothing
+ in node: :semicolon # nothing
+ in node: :error # nothing
+ end
+ end
- parse_block_into_rule_sets!(block, options)
+ declarations
end
# Add a CSS rule by setting the +selectors+, +declarations+
@@ -342,140 +444,6 @@ def compact! # :nodoc:
[]
end
- def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
- current_media_queries = [:all]
- if options[:media_types]
- current_media_queries = options[:media_types].flatten.collect { |mt| CssParser.sanitize_media_query(mt) }
- end
-
- in_declarations = 0
- block_depth = 0
-
- in_charset = false # @charset is ignored for now
- in_string = false
- in_at_media_rule = false
- in_media_block = false
-
- current_selectors = String.new
- current_media_query = String.new
- current_declarations = String.new
-
- # once we are in a rule, we will use this to store where we started if we are capturing offsets
- rule_start = nil
- offset = nil
-
- block.scan(/\s+|\\{2,}|\\?[{}\s"]|[()]|.[^\s"{}()\\]*/) do |token|
- # save the regex offset so that we know where in the file we are
- offset = Regexp.last_match.offset(0) if options[:capture_offsets]
-
- if token.start_with?('"') # found un-escaped double quote
- in_string = !in_string
- end
-
- if in_declarations > 0
- # too deep, malformed declaration block
- if in_declarations > 1
- in_declarations -= 1 if token.include?('}')
- next
- end
-
- if !in_string && token.include?('{')
- in_declarations += 1
- next
- end
-
- current_declarations << token
-
- if !in_string && token.include?('}')
- current_declarations.gsub!(/\}\s*$/, '')
-
- in_declarations -= 1
- current_declarations.strip!
-
- unless current_declarations.empty?
- add_rule_options = {
- selectors: current_selectors, block: current_declarations,
- media_types: current_media_queries
- }
- if options[:capture_offsets]
- add_rule_options.merge!(filename: options[:filename], offset: rule_start..offset.last)
- end
- add_rule!(**add_rule_options)
- end
-
- current_selectors = String.new
- current_declarations = String.new
-
- # restart our search for selectors and declarations
- rule_start = nil if options[:capture_offsets]
- end
- elsif token =~ /@media/i
- # found '@media', reset current media_types
- in_at_media_rule = true
- current_media_queries = []
- elsif in_at_media_rule
- if token.include?('{')
- block_depth += 1
- in_at_media_rule = false
- in_media_block = true
- current_media_queries << CssParser.sanitize_media_query(current_media_query)
- current_media_query = String.new
- elsif token.include?(',')
- # new media query begins
- token.tr!(',', ' ')
- token.strip!
- current_media_query << token << ' '
- current_media_queries << CssParser.sanitize_media_query(current_media_query)
- current_media_query = String.new
- else
- token.strip!
- # special-case the ( and ) tokens to remove inner-whitespace
- # (eg we'd prefer '(width: 500px)' to '( width: 500px )' )
- case token
- when '('
- current_media_query << token
- when ')'
- current_media_query.sub!(/ ?$/, token)
- else
- current_media_query << token << ' '
- end
- end
- elsif in_charset or token =~ /@charset/i
- # iterate until we are out of the charset declaration
- in_charset = !token.include?(';')
- elsif !in_string && token.include?('}')
- block_depth -= 1
-
- # reset the current media query scope
- if in_media_block
- current_media_queries = [:all]
- in_media_block = false
- end
- elsif !in_string && token.include?('{')
- current_selectors.strip!
- in_declarations += 1
- else
- # if we are in a selector, add the token to the current selectors
- current_selectors << token
-
- # mark this as the beginning of the selector unless we have already marked it
- rule_start = offset.first if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
- end
- end
-
- # check for unclosed braces
- return unless in_declarations > 0
-
- add_rule_options = {
- selectors: current_selectors, block: current_declarations,
- media_types: current_media_queries
- }
- if options[:capture_offsets]
- add_rule_options.merge!(filename: options[:filename], offset: rule_start..offset.last)
- end
- add_rule!(**add_rule_options)
- end
-
# Load a remote CSS file.
#
# You can also pass in file://test.css
@@ -565,35 +533,6 @@ def circular_reference_check(path)
end
end
- # Remove a pattern from a given string
- #
- # Returns a string.
- def ignore_pattern(css, regex, options)
- # if we are capturing file offsets, replace the characters with spaces to retail the original positions
- return css.gsub(regex) { |m| ' ' * m.length } if options[:capture_offsets]
-
- # otherwise just strip it out
- css.gsub(regex, '')
- end
-
- # Strip comments and clean up blank lines from a block of CSS.
- #
- # Returns a string.
- def cleanup_block(block, options = {}) # :nodoc:
- # Strip CSS comments
- utf8_block = block.encode('UTF-8', 'UTF-8', invalid: :replace, undef: :replace, replace: ' ')
- utf8_block = ignore_pattern(utf8_block, STRIP_CSS_COMMENTS_RX, options)
-
- # Strip HTML comments - they shouldn't really be in here but
- # some people are just crazy...
- utf8_block = ignore_pattern(utf8_block, STRIP_HTML_COMMENTS_RX, options)
-
- # Strip lines containing just whitespace
- utf8_block.gsub!(/^\s+$/, '') unless options[:capture_offsets]
-
- utf8_block
- end
-
# Download a file into a string.
#
# Returns the file's data and character set in an array.
diff --git a/lib/css_parser/regexps.rb b/lib/css_parser/regexps.rb
index cf83b2d..415b112 100644
--- a/lib/css_parser/regexps.rb
+++ b/lib/css_parser/regexps.rb
@@ -25,14 +25,7 @@ def self.regex_possible_values(*values)
URI_RX_OR_NONE = Regexp.union(URI_RX, /none/i)
RE_GRADIENT = /[-a-z]*gradient\([-a-z0-9 .,#%()]*\)/im.freeze
- # Initial parsing
- RE_AT_IMPORT_RULE = /@import\s+(url\()?["']?(.[^'"\s]*)["']?\)?([\w\s,^\])]*)\)?;?/.freeze
- #--
- # RE_AT_MEDIA_RULE = Regexp.new('(\"(.[^\n\r\f\\"]*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\")')
-
- # RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
- #++
IMPORTANT_IN_PROPERTY_RX = /\s*!important\b\s*/i.freeze
RE_INSIDE_OUTSIDE = regex_possible_values 'inside', 'outside'
@@ -46,9 +39,6 @@ def self.regex_possible_values(*values)
)
RE_IMAGE = Regexp.union(CssParser::URI_RX, CssParser::RE_GRADIENT, /none/i)
- STRIP_CSS_COMMENTS_RX = %r{/\*.*?\*/}m.freeze
- STRIP_HTML_COMMENTS_RX = //m.freeze
-
# Special units
BOX_MODEL_UNITS_RX = /(auto|inherit|0|(-*([0-9]+|[0-9]*\.[0-9]+)(rem|vw|vh|vm|vmin|vmax|e[mx]+|px|[cm]+m|p[tc+]|in|%)))([\s;]|\Z)/imx.freeze
RE_LENGTH_OR_PERCENTAGE = Regexp.new('([\-]*(([0-9]*\.[0-9]+)|[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%))', Regexp::IGNORECASE)
diff --git a/lib/css_parser/rule_set.rb b/lib/css_parser/rule_set.rb
index 7668236..a245c8d 100644
--- a/lib/css_parser/rule_set.rb
+++ b/lib/css_parser/rule_set.rb
@@ -447,6 +447,11 @@ def compute_dimensions_shorthand(values)
end
def parse_declarations!(block) # :nodoc:
+ if block.is_a? Declarations
+ self.declarations = block
+ return
+ end
+
self.declarations = Declarations.new
return unless block
diff --git a/test/test_css_parser_offset_capture.rb b/test/test_css_parser_offset_capture.rb
index a29e181..c949f59 100644
--- a/test/test_css_parser_offset_capture.rb
+++ b/test/test_css_parser_offset_capture.rb
@@ -18,30 +18,31 @@ def test_capturing_offsets_for_local_file
# check that we found the body rule where we expected
assert_equal 0, rules[0].offset.first
- assert_equal 43, rules[0].offset.last
+ assert_equal 41, rules[0].offset.last
assert_equal file_name, rules[0].filename
# and the p rule
assert_equal 45, rules[1].offset.first
- assert_equal 63, rules[1].offset.last
+ assert_equal 61, rules[1].offset.last
assert_equal file_name, rules[1].filename
end
# http://github.com/premailer/css_parser/issues#issue/4
def test_capturing_offsets_from_remote_file
# TODO: test SSL locally
+ # TODO: cache request to make test not require internet (and so much faster)
@cp.load_uri!("https://dialect.ca/inc/screen.css", capture_offsets: true)
# there are a lot of rules in this file, but check some rule offsets
rules = @cp.find_rule_sets(['#container', '#name_case_converter textarea'])
assert_equal 2, rules.count
- assert_equal 2172, rules.first.offset.first
- assert_equal 2227, rules.first.offset.last
+ assert_equal 2_172, rules.first.offset.first
+ assert_equal 2_225, rules.first.offset.last
assert_equal 'https://dialect.ca/inc/screen.css', rules.first.filename
assert_equal 10_703, rules.last.offset.first
- assert_equal 10_752, rules.last.offset.last
+ assert_equal 10_750, rules.last.offset.last
assert_equal 'https://dialect.ca/inc/screen.css', rules.last.filename
end
@@ -58,22 +59,36 @@ def test_capturing_offsets_from_string
assert_equal 4, rules.count
assert_equal 6, rules[0].offset.first
- assert_equal 27, rules[0].offset.last
+ assert_equal 25, rules[0].offset.last
assert_equal 'index.html', rules[0].filename
assert_equal 34, rules[1].offset.first
- assert_equal 53, rules[1].offset.last
+ assert_equal 51, rules[1].offset.last
assert_equal 'index.html', rules[1].filename
assert_equal 60, rules[2].offset.first
- assert_equal 102, rules[2].offset.last
+ assert_equal 100, rules[2].offset.last
assert_equal 'index.html', rules[2].filename
assert_equal 109, rules[3].offset.first
- assert_equal 133, rules[3].offset.last
+ assert_equal 131, rules[3].offset.last
assert_equal 'index.html', rules[3].filename
end
+ def test_capturing_offsets_from_string_without_closing_bracket
+ css = <<-CSS
+ body { margin: 0px;
+ CSS
+ @cp.load_string!(css, capture_offsets: true, filename: 'index.html')
+
+ rules = @cp.find_rule_sets(['body', 'p', '#content', '.content'])
+ assert_equal 1, rules.count
+
+ assert_equal 6, rules[0].offset.first
+ assert_equal 25, rules[0].offset.last
+ assert_equal 'index.html', rules[0].filename
+ end
+
def test_capturing_offsets_with_imports
base_dir = Pathname.new(__dir__).join('fixtures')
@cp.load_file!('import1.css', base_dir: base_dir, capture_offsets: true)
@@ -83,25 +98,25 @@ def test_capturing_offsets_with_imports
# check that we found the div rule where we expected in the primary file
assert_equal 'div', rules[0].selectors.join
assert_equal 31, rules[0].offset.first
- assert_equal 51, rules[0].offset.last
+ assert_equal 49, rules[0].offset.last
assert_equal base_dir.join('import1.css').to_s, rules[0].filename
# check that the a rule in the first import is where we expect
assert_equal 'a', rules[1].selectors.join
assert_equal 26, rules[1].offset.first
- assert_equal 54, rules[1].offset.last
+ assert_equal 52, rules[1].offset.last
assert_equal base_dir.join('subdir/import2.css').to_s, rules[1].filename
# and the body rule in the second import
assert_equal 'body', rules[2].selectors.join
assert_equal 0, rules[2].offset.first
- assert_equal 43, rules[2].offset.last
+ assert_equal 41, rules[2].offset.last
assert_equal base_dir.join('simple.css').to_s, rules[2].filename
# as well as the p rule in the second import
assert_equal 'p', rules[3].selectors.join
assert_equal 45, rules[3].offset.first
- assert_equal 63, rules[3].offset.last
+ assert_equal 61, rules[3].offset.last
assert_equal base_dir.join('simple.css').to_s, rules[3].filename
end
end