diff --git a/Gemfile b/Gemfile index 013b1700a..fd9d00b02 100644 --- a/Gemfile +++ b/Gemfile @@ -13,6 +13,7 @@ group :benchmark, :test do gem 'benchmark-ips' gem 'memory_profiler' gem 'terminal-table' + gem "lru_redux" install_if -> { RUBY_PLATFORM !~ /mingw|mswin|java/ && RUBY_ENGINE != 'truffleruby' } do gem 'stackprof' diff --git a/Rakefile b/Rakefile index 21135d1b9..6ccd2c866 100755 --- a/Rakefile +++ b/Rakefile @@ -71,7 +71,7 @@ end namespace :benchmark do desc "Run the liquid benchmark with lax parsing" - task :run do + task :lax do ruby "./performance/benchmark.rb lax" end @@ -80,11 +80,30 @@ namespace :benchmark do ruby "./performance/benchmark.rb strict" end + desc "Run the liquid benchmark with both lax and strict parsing" + task run: [:lax, :strict] + desc "Run unit benchmarks" - task :unit do - Dir["./performance/unit/*_benchmark.rb"].each do |file| - puts "🧪 Running #{file}" - ruby file + namespace :unit do + task :all do + Dir["./performance/unit/*_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end + end + + task :lexer do + Dir["./performance/unit/lexer_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end + end + + task :expression do + Dir["./performance/unit/expression_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end end end end diff --git a/lib/liquid.rb b/lib/liquid.rb index 0a970005d..4d0a71a64 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -21,6 +21,8 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +require "strscan" + module Liquid FilterSeparator = /\|/ ArgumentSeparator = ',' @@ -44,6 +46,7 @@ module Liquid VariableParser = /\[(?>[^\[\]]+|\g<0>)*\]|#{VariableSegment}+\??/o RAISE_EXCEPTION_LAMBDA = ->(_e) { raise } + HAS_STRING_SCANNER_SCAN_BYTE = StringScanner.instance_methods.include?(:scan_byte) end require "liquid/version" @@ -68,7 +71,6 @@ module Liquid require 'liquid/errors' require 'liquid/interrupts' require 'liquid/strainer_template' -require 'liquid/expression' require 'liquid/context' require 'liquid/tag' require 'liquid/block_body' @@ -77,6 +79,7 @@ module Liquid require 'liquid/variable_lookup' require 'liquid/range_lookup' require 'liquid/resource_limits' +require 'liquid/expression' require 'liquid/template' require 'liquid/condition' require 'liquid/utils' diff --git a/lib/liquid/context.rb b/lib/liquid/context.rb index 19daaeb73..83aab805a 100644 --- a/lib/liquid/context.rb +++ b/lib/liquid/context.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "lru_redux" + module Liquid # Context keeps the variable stack and resolves variables, as well as keywords # @@ -39,6 +41,11 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro @filters = [] @global_filter = nil @disabled_tags = {} + @expression_cache = LruRedux::ThreadSafeCache.new(1000) + + # Instead of constructing new StringScanner objects for each Expression parse, + # we recycle the same one. + @string_scanner = StringScanner.new("") @registers.static[:cached_partials] ||= {} @registers.static[:file_system] ||= environment.file_system @@ -176,7 +183,7 @@ def []=(key, value) # Example: # products == empty #=> products.empty? def [](expression) - evaluate(Expression.parse(expression)) + evaluate(Expression.parse(expression, @string_scanner, @expression_cache)) end def key?(key) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index a1426732c..f2abb2fc1 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "lru_redux" + module Liquid class Expression LITERALS = { @@ -10,37 +12,106 @@ class Expression 'true' => true, 'false' => false, 'blank' => '', - 'empty' => '' + 'empty' => '', + # in lax mode, minus sign can be a VariableLookup + # For simplicity and performace, we treat it like a literal + '-' => VariableLookup.parse("-", nil).freeze, }.freeze - INTEGERS_REGEX = /\A(-?\d+)\z/ - FLOATS_REGEX = /\A(-?\d[\d\.]+)\z/ + DOT = ".".ord + ZERO = "0".ord + NINE = "9".ord + DASH = "-".ord # Use an atomic group (?>...) to avoid pathological backtracing from # malicious input as described in https://github.com/Shopify/liquid/issues/1357 - RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ + RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ + INTEGER_REGEX = /\A(-?\d+)\z/ + FLOAT_REGEX = /\A(-?\d+)\.\d+\z/ + + class << self + def parse(markup, ss = StringScanner.new(""), cache = nil) + return unless markup + + markup = markup.strip # markup can be a frozen string - def self.parse(markup) - return nil unless markup + if (markup.start_with?('"') && markup.end_with?('"')) || + (markup.start_with?("'") && markup.end_with?("'")) + return markup[1..-2] + elsif LITERALS.key?(markup) + return LITERALS[markup] + end + + # Cache only exists during parsing + if cache + return cache[markup] if cache.key?(markup) - markup = markup.strip - if (markup.start_with?('"') && markup.end_with?('"')) || - (markup.start_with?("'") && markup.end_with?("'")) - return markup[1..-2] + cache[markup] = inner_parse(markup, ss, cache).freeze + else + inner_parse(markup, ss, nil).freeze + end end - case markup - when INTEGERS_REGEX - Regexp.last_match(1).to_i - when RANGES_REGEX - RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) - when FLOATS_REGEX - Regexp.last_match(1).to_f - else - if LITERALS.key?(markup) - LITERALS[markup] + def inner_parse(markup, ss, cache) + if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX + return RangeLookup.parse( + Regexp.last_match(1), + Regexp.last_match(2), + ss, + cache, + ) + end + + if (num = parse_number(markup, ss)) + num + else + VariableLookup.parse(markup, ss, cache) + end + end + + def parse_number(markup, ss) + # check if the markup is simple integer or float + case markup + when INTEGER_REGEX + return Integer(markup, 10) + when FLOAT_REGEX + return markup.to_f + end + + ss.string = markup + # the first byte must be a digit, a period, or a dash + byte = ss.scan_byte + + return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) + + # The markup could be a float with multiple dots + first_dot_pos = nil + num_end_pos = nil + + while (byte = ss.scan_byte) + return false if byte != DOT && (byte < ZERO || byte > NINE) + + # we found our number and now we are just scanning the rest of the string + next if num_end_pos + + if byte == DOT + if first_dot_pos.nil? + first_dot_pos = ss.pos + else + # we found another dot, so we know that the number ends here + num_end_pos = ss.pos - 1 + end + end + end + + num_end_pos = markup.length if ss.eos? + + if num_end_pos + # number ends with a number "123.123" + markup.byteslice(0, num_end_pos).to_f else - VariableLookup.parse(markup) + # number ends with a dot "123." + markup.byteslice(0, first_dot_pos).to_f end end end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index b9e5443c1..b28e87842 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -1,66 +1,7 @@ # frozen_string_literal: true -require "strscan" - module Liquid - class Lexer1 - SPECIALS = { - '|' => :pipe, - '.' => :dot, - ':' => :colon, - ',' => :comma, - '[' => :open_square, - ']' => :close_square, - '(' => :open_round, - ')' => :close_round, - '?' => :question, - '-' => :dash, - }.freeze - IDENTIFIER = /[a-zA-Z_][\w-]*\??/ - SINGLE_STRING_LITERAL = /'[^\']*'/ - DOUBLE_STRING_LITERAL = /"[^\"]*"/ - STRING_LITERAL = Regexp.union(SINGLE_STRING_LITERAL, DOUBLE_STRING_LITERAL) - NUMBER_LITERAL = /-?\d+(\.\d+)?/ - DOTDOT = /\.\./ - COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/ - WHITESPACE_OR_NOTHING = /\s*/ - - def initialize(input) - @ss = StringScanner.new(input) - end - - def tokenize - @output = [] - - until @ss.eos? - @ss.skip(WHITESPACE_OR_NOTHING) - break if @ss.eos? - tok = if (t = @ss.scan(COMPARISON_OPERATOR)) - [:comparison, t] - elsif (t = @ss.scan(STRING_LITERAL)) - [:string, t] - elsif (t = @ss.scan(NUMBER_LITERAL)) - [:number, t] - elsif (t = @ss.scan(IDENTIFIER)) - [:id, t] - elsif (t = @ss.scan(DOTDOT)) - [:dotdot, t] - else - c = @ss.getch - if (s = SPECIALS[c]) - [s, c] - else - raise SyntaxError, "Unexpected character #{c}" - end - end - @output << tok - end - - @output << [:end_of_string] - end - end - - class Lexer2 + class Lexer CLOSE_ROUND = [:close_round, ")"].freeze CLOSE_SQUARE = [:close_square, "]"].freeze COLON = [:colon, ":"].freeze @@ -92,6 +33,7 @@ class Lexer2 SINGLE_COMPARISON_TOKENS = [].tap do |table| table["<".ord] = COMPARISON_LESS_THAN table[">".ord] = COMPARISON_GREATER_THAN + table.freeze end TWO_CHARS_COMPARISON_JUMP_TABLE = [].tap do |table| @@ -103,18 +45,17 @@ class Lexer2 sub_table["=".ord] = COMPARISION_NOT_EQUAL sub_table.freeze end + table.freeze end COMPARISON_JUMP_TABLE = [].tap do |table| table["<".ord] = [].tap do |sub_table| sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT - RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_LESS_THAN } sub_table.freeze end table[">".ord] = [].tap do |sub_table| sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL - RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_GREATER_THAN } sub_table.freeze end table.freeze @@ -157,81 +98,76 @@ class Lexer2 table.freeze end - def initialize(input) - @ss = StringScanner.new(input) - end - # rubocop:disable Metrics/BlockNesting - def tokenize - @output = [] - - until @ss.eos? - @ss.skip(WHITESPACE_OR_NOTHING) - - break if @ss.eos? - - start_pos = @ss.pos - peeked = @ss.peek_byte - - if (special = SPECIAL_TABLE[peeked]) - @ss.scan_byte - # Special case for ".." - if special == DOT && @ss.peek_byte == DOT_ORD - @ss.scan_byte - @output << DOTDOT - elsif special == DASH - # Special case for negative numbers - if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte] - @ss.pos -= 1 - @output << [:number, @ss.scan(NUMBER_LITERAL)] + class << self + def tokenize(ss) + output = [] + + until ss.eos? + ss.skip(WHITESPACE_OR_NOTHING) + + break if ss.eos? + + start_pos = ss.pos + peeked = ss.peek_byte + + if (special = SPECIAL_TABLE[peeked]) + ss.scan_byte + # Special case for ".." + if special == DOT && ss.peek_byte == DOT_ORD + ss.scan_byte + output << DOTDOT + elsif special == DASH + # Special case for negative numbers + if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte] + ss.pos -= 1 + output << [:number, ss.scan(NUMBER_LITERAL)] + else + output << special + end else - @output << special + output << special end - else - @output << special - end - elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) - @ss.scan_byte - if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) - @output << found - @ss.scan_byte - else - raise_syntax_error(start_pos) - end - elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) - @ss.scan_byte - if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) - @output << found - @ss.scan_byte - else - @output << SINGLE_COMPARISON_TOKENS[peeked] - end - else - type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] - - if type && (t = @ss.scan(pattern)) - # Special case for "contains" - @output << if type == :id && t == "contains" && @output.last&.first != :dot - COMPARISON_CONTAINS + elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) + ss.scan_byte + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) + output << found + ss.scan_byte + else + raise_syntax_error(start_pos, ss) + end + elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) + ss.scan_byte + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) + output << found + ss.scan_byte else - [type, t] + output << SINGLE_COMPARISON_TOKENS[peeked] end else - raise_syntax_error(start_pos) + type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] + + if type && (t = ss.scan(pattern)) + # Special case for "contains" + output << if type == :id && t == "contains" && output.last&.first != :dot + COMPARISON_CONTAINS + else + [type, t] + end + else + raise_syntax_error(start_pos, ss) + end end end + # rubocop:enable Metrics/BlockNesting + output << EOS end - # rubocop:enable Metrics/BlockNesting - @output << EOS - end - - def raise_syntax_error(start_pos) - @ss.pos = start_pos - # the character could be a UTF-8 character, use getch to get all the bytes - raise SyntaxError, "Unexpected character #{@ss.getch}" + def raise_syntax_error(start_pos, ss) + ss.pos = start_pos + # the character could be a UTF-8 character, use getch to get all the bytes + raise SyntaxError, "Unexpected character #{ss.getch}" + end end end - - Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1 end diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index 7bd5418d5..60cdf9e41 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -12,6 +12,18 @@ def initialize(options = Const::EMPTY_HASH) @locale = @template_options[:locale] ||= I18n.new @warnings = [] + # constructing new StringScanner in Lexer, Tokenizer, etc is expensive + # This StringScanner will be shared by all of them + @string_scanner = StringScanner.new("") + + @expression_cache = if options[:expression_cache].nil? + {} + elsif options[:expression_cache].respond_to?(:[]) && options[:expression_cache].respond_to?(:[]=) + options[:expression_cache] + elsif options[:expression_cache] + {} + end + self.depth = 0 self.partial = false end @@ -24,12 +36,22 @@ def new_block_body Liquid::BlockBody.new end - def new_tokenizer(markup, start_line_number: nil, for_liquid_tag: false) - Tokenizer.new(markup, line_number: start_line_number, for_liquid_tag: for_liquid_tag) + def new_parser(input) + @string_scanner.string = input + Parser.new(@string_scanner) + end + + def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) + Tokenizer.new( + source: source, + string_scanner: @string_scanner, + line_number: start_line_number, + for_liquid_tag: for_liquid_tag, + ) end def parse_expression(markup) - Expression.parse(markup) + Expression.parse(markup, @string_scanner, @expression_cache) end def partial=(value) diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index 8560fc58e..645dfa3a1 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -3,8 +3,8 @@ module Liquid class Parser def initialize(input) - l = Lexer.new(input) - @tokens = l.tokenize + ss = input.is_a?(StringScanner) ? input : StringScanner.new(input) + @tokens = Lexer.tokenize(ss) @p = 0 # pointer to current location end diff --git a/lib/liquid/range_lookup.rb b/lib/liquid/range_lookup.rb index fd208a676..bc316fe12 100644 --- a/lib/liquid/range_lookup.rb +++ b/lib/liquid/range_lookup.rb @@ -2,9 +2,9 @@ module Liquid class RangeLookup - def self.parse(start_markup, end_markup) - start_obj = Expression.parse(start_markup) - end_obj = Expression.parse(end_markup) + def self.parse(start_markup, end_markup, string_scanner, cache = nil) + start_obj = Expression.parse(start_markup, string_scanner, cache) + end_obj = Expression.parse(end_markup, string_scanner, cache) if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate) new(start_obj, end_obj) else diff --git a/lib/liquid/tags/cycle.rb b/lib/liquid/tags/cycle.rb index 06d788de3..c2d94d5f4 100644 --- a/lib/liquid/tags/cycle.rb +++ b/lib/liquid/tags/cycle.rb @@ -68,7 +68,13 @@ def render_to_output_buffer(context, output) def variables_from_string(markup) markup.split(',').collect do |var| var =~ /\s*(#{QuotedFragment})\s*/o - Regexp.last_match(1) ? parse_expression(Regexp.last_match(1)) : nil + next unless Regexp.last_match(1) + + # Expression Parser returns cached objects, and we need to dup them to + # start the cycle over for each new cycle call. + # Liquid-C does not have a cache, so we don't need to dup the object. + var = parse_expression(Regexp.last_match(1)) + var.is_a?(VariableLookup) ? var.dup : var end.compact end diff --git a/lib/liquid/tags/for.rb b/lib/liquid/tags/for.rb index 0dda3081d..27af15bf1 100644 --- a/lib/liquid/tags/for.rb +++ b/lib/liquid/tags/for.rb @@ -88,7 +88,7 @@ def lax_parse(markup) end def strict_parse(markup) - p = Parser.new(markup) + p = @parse_context.new_parser(markup) @variable_name = p.consume(:id) raise SyntaxError, options[:locale].t("errors.syntax.for_invalid_in") unless p.id?('in') diff --git a/lib/liquid/tags/if.rb b/lib/liquid/tags/if.rb index bcf250a11..040fecb84 100644 --- a/lib/liquid/tags/if.rb +++ b/lib/liquid/tags/if.rb @@ -102,7 +102,7 @@ def lax_parse(markup) end def strict_parse(markup) - p = Parser.new(markup) + p = @parse_context.new_parser(markup) condition = parse_binary_comparisons(p) p.consume(:end_of_string) condition diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 740f85999..008ec070d 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -1,20 +1,43 @@ # frozen_string_literal: true +require "strscan" + module Liquid class Tokenizer attr_reader :line_number, :for_liquid_tag - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) - @source = source.to_s.to_str - @line_number = line_number || (line_numbers ? 1 : nil) + TAG_END = /%\}/ + TAG_OR_VARIABLE_START = /\{[\{\%]/ + NEWLINE = /\n/ + + OPEN_CURLEY = "{".ord + CLOSE_CURLEY = "}".ord + PERCENTAGE = "%".ord + + def initialize( + source:, + string_scanner:, + line_numbers: false, + line_number: nil, + for_liquid_tag: false + ) + @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag - @offset = 0 - @tokens = tokenize + @source = source.to_s.to_str + @offset = 0 + @tokens = [] + + if @source + @ss = string_scanner + @ss.string = @source + tokenize + end end def shift token = @tokens[@offset] - return nil unless token + + return unless token @offset += 1 @@ -28,18 +51,105 @@ def shift private def tokenize - return [] if @source.empty? + if @for_liquid_tag + @tokens = @source.split("\n") + else + @tokens << shift_normal until @ss.eos? + end - return @source.split("\n") if @for_liquid_tag + @source = nil + @ss = nil + end - tokens = @source.split(TemplateParser) + def shift_normal + token = next_token - # removes the rogue empty element at the beginning of the array - if tokens[0]&.empty? - @offset += 1 + return unless token + + token + end + + def next_token + # possible states: :text, :tag, :variable + byte_a = @ss.peek_byte + + if byte_a == OPEN_CURLEY + @ss.scan_byte + + byte_b = @ss.peek_byte + + if byte_b == PERCENTAGE + @ss.scan_byte + return next_tag_token + elsif byte_b == OPEN_CURLEY + @ss.scan_byte + return next_variable_token + end + + @ss.pos -= 1 end - tokens + next_text_token + end + + def next_text_token + start = @ss.pos + + unless @ss.skip_until(TAG_OR_VARIABLE_START) + token = @ss.rest + @ss.terminate + return token + end + + pos = @ss.pos -= 2 + @source.byteslice(start, pos - start) + end + + def next_variable_token + start = @ss.pos - 2 + + byte_a = byte_b = @ss.scan_byte + + while byte_b + byte_a = @ss.scan_byte while byte_a && (byte_a != CLOSE_CURLEY && byte_a != OPEN_CURLEY) + + break unless byte_a + + if @ss.eos? + return byte_a == CLOSE_CURLEY ? @source.byteslice(start, @ss.pos - start) : "{{" + end + + byte_b = @ss.scan_byte + + if byte_a == CLOSE_CURLEY + if byte_b == CLOSE_CURLEY + return @source.byteslice(start, @ss.pos - start) + elsif byte_b != CLOSE_CURLEY + @ss.pos -= 1 + return @source.byteslice(start, @ss.pos - start) + end + elsif byte_a == OPEN_CURLEY && byte_b == PERCENTAGE + return next_tag_token_with_start(start) + end + + byte_a = byte_b + end + + "{{" + end + + def next_tag_token + start = @ss.pos - 2 + if (len = @ss.skip_until(TAG_END)) + @source.byteslice(start, len + 2) + else + "{%" + end + end + + def next_tag_token_with_start(start) + @ss.skip_until(TAG_END) + @source.byteslice(start, @ss.pos - start) end end end diff --git a/lib/liquid/variable.rb b/lib/liquid/variable.rb index 2bcf871ea..090690559 100644 --- a/lib/liquid/variable.rb +++ b/lib/liquid/variable.rb @@ -61,7 +61,7 @@ def lax_parse(markup) def strict_parse(markup) @filters = [] - p = Parser.new(markup) + p = @parse_context.new_parser(markup) return if p.look(:end_of_string) diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb index 1ade30ef9..06c24b764 100644 --- a/lib/liquid/variable_lookup.rb +++ b/lib/liquid/variable_lookup.rb @@ -6,16 +6,20 @@ class VariableLookup attr_reader :name, :lookups - def self.parse(markup) - new(markup) + def self.parse(markup, string_scanner, cache = nil) + new(markup, string_scanner, cache) end - def initialize(markup) + def initialize(markup, string_scanner = StringScanner.new(""), cache = nil) lookups = markup.scan(VariableParser) name = lookups.shift if name&.start_with?('[') && name&.end_with?(']') - name = Expression.parse(name[1..-2]) + name = Expression.parse( + name[1..-2], + string_scanner, + cache, + ) end @name = name @@ -25,7 +29,11 @@ def initialize(markup) @lookups.each_index do |i| lookup = lookups[i] if lookup&.start_with?('[') && lookup&.end_with?(']') - lookups[i] = Expression.parse(lookup[1..-2]) + lookups[i] = Expression.parse( + lookup[1..-2], + string_scanner, + cache, + ) elsif COMMAND_METHODS.include?(lookup) @command_flags |= 1 << i end diff --git a/lib/liquid/version.rb b/lib/liquid/version.rb index 52831e7b2..7c3004e50 100644 --- a/lib/liquid/version.rb +++ b/lib/liquid/version.rb @@ -2,5 +2,5 @@ # frozen_string_literal: true module Liquid - VERSION = "5.6.0" + VERSION = "5.6.1" end diff --git a/liquid.gemspec b/liquid.gemspec index 9f7e1923a..1b9a91004 100644 --- a/liquid.gemspec +++ b/liquid.gemspec @@ -28,7 +28,7 @@ Gem::Specification.new do |s| s.require_path = "lib" - s.add_dependency("strscan") + s.add_dependency("strscan", ">= 3.1.1") s.add_dependency("bigdecimal") s.add_development_dependency('rake', '~> 13.0') diff --git a/performance/benchmark.rb b/performance/benchmark.rb index f897c5113..b61e9057c 100644 --- a/performance/benchmark.rb +++ b/performance/benchmark.rb @@ -9,14 +9,17 @@ profiler = ThemeRunner.new Benchmark.ips do |x| - x.time = 10 - x.warmup = 5 + x.time = 20 + x.warmup = 10 puts puts "Running benchmark for #{x.time} seconds (with #{x.warmup} seconds warmup)." puts - x.report("parse:") { profiler.compile } - x.report("render:") { profiler.render } - x.report("parse & render:") { profiler.run } + phase = ENV["PHASE"] || "all" + + x.report("tokenize:") { profiler.tokenize } if phase == "all" || phase == "tokenize" + x.report("parse:") { profiler.compile } if phase == "all" || phase == "parse" + x.report("render:") { profiler.render } if phase == "all" || phase == "render" + x.report("parse & render:") { profiler.run } if phase == "all" || phase == "run" end diff --git a/performance/theme_runner.rb b/performance/theme_runner.rb index 3dd5548e3..469503670 100644 --- a/performance/theme_runner.rb +++ b/performance/theme_runner.rb @@ -48,6 +48,19 @@ def compile end end + # `tokenize` will just test the tokenizen portion of liquid without any templates + def tokenize + ss = StringScanner.new("") + @tests.each do |test_hash| + tokenizer = Liquid::Tokenizer.new( + source: test_hash[:liquid], + string_scanner: ss, + line_numbers: true, + ) + while tokenizer.shift; end + end + end + # `run` is called to benchmark rendering and compiling at the same time def run each_test do |liquid, layout, assigns, page_template, template_name| diff --git a/performance/unit/expression_benchmark.rb b/performance/unit/expression_benchmark.rb new file mode 100644 index 000000000..8cd552595 --- /dev/null +++ b/performance/unit/expression_benchmark.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +require "benchmark/ips" + +# benchmark liquid lexing + +require 'liquid' + +RubyVM::YJIT.enable + +STRING_MARKUPS = [ + "\"foo\"", + "\"fooooooooooo\"", + "\"foooooooooooooooooooooooooooooo\"", + "'foo'", + "'fooooooooooo'", + "'foooooooooooooooooooooooooooooo'", +] + +VARIABLE_MARKUPS = [ + "article", + "article.title", + "article.title.size", + "very_long_variable_name_2024_11_05", + "very_long_variable_name_2024_11_05.size", +] + +NUMBER_MARKUPS = [ + "0", + "35", + "1241891024912849", + "3.5", + "3.51214128409128", + "12381902839.123819283910283", + "123.456.789", + "-123", + "-12.33", + "-405.231", + "-0", + "0", + "0.0", + "0.0000000000000000000000", + "0.00000000001", +] + +RANGE_MARKUPS = [ + "(1..30)", + "(1...30)", + "(1..30..5)", + "(1.0...30.0)", + "(1.........30)", + "(1..foo)", + "(foo..30)", + "(foo..bar)", + "(foo...bar...100)", + "(foo...bar...100.0)", +] + +LITERAL_MARKUPS = [ + nil, + 'nil', + 'null', + '', + 'true', + 'false', + 'blank', + 'empty', +] + +MARKUPS = { + "string" => STRING_MARKUPS, + "literal" => LITERAL_MARKUPS, + "variable" => VARIABLE_MARKUPS, + "number" => NUMBER_MARKUPS, + "range" => RANGE_MARKUPS, +} + +Benchmark.ips do |x| + x.config(time: 5, warmup: 5) + + MARKUPS.each do |type, markups| + x.report("Liquid::Expression#parse: #{type}") do + markups.each do |markup| + Liquid::Expression.parse(markup) + end + end + end + + x.report("Liquid::Expression#parse: all") do + MARKUPS.values.flatten.each do |markup| + Liquid::Expression.parse(markup) + end + end +end diff --git a/performance/unit/lexer_benchmark.rb b/performance/unit/lexer_benchmark.rb index 2faa1f686..fda6ce1f6 100644 --- a/performance/unit/lexer_benchmark.rb +++ b/performance/unit/lexer_benchmark.rb @@ -29,31 +29,12 @@ "foo | default: -1", ] -EXPRESSIONS.each do |expr| - lexer_1_result = Liquid::Lexer1.new(expr).tokenize - lexer_2_result = Liquid::Lexer2.new(expr).tokenize - - next if lexer_1_result == lexer_2_result - - warn "Lexer1 and Lexer2 results are different for expression: #{expr}" - warn "expected: #{lexer_1_result}" - warn "got: #{lexer_2_result}" - abort -end - Benchmark.ips do |x| x.config(time: 10, warmup: 5) - x.report("Liquid::Lexer1#tokenize") do - EXPRESSIONS.each do |expr| - l = Liquid::Lexer1.new(expr) - l.tokenize - end - end - - x.report("Liquid::Lexer2#tokenize") do + x.report("Liquid::Lexer#tokenize") do EXPRESSIONS.each do |expr| - l = Liquid::Lexer2.new(expr) + l = Liquid::Lexer.new(expr) l.tokenize end end diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index 80f6f4173..0eef5dc9d 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'test_helper' +require 'lru_redux' class ExpressionTest < Minitest::Test def test_keyword_literals @@ -13,6 +14,7 @@ def test_string assert_template_result("double quoted", '{{"double quoted"}}') assert_template_result("spaced", "{{ 'spaced' }}") assert_template_result("spaced2", "{{ 'spaced2' }}") + assert_template_result("emoji🔥", "{{ 'emoji🔥' }}") end def test_int @@ -22,6 +24,7 @@ def test_int end def test_float + assert_template_result("-17.42", "{{ -17.42 }}") assert_template_result("2.5", "{{ 2.5 }}") assert_expression_result(1.5, "1.5") end @@ -40,6 +43,101 @@ def test_range ) end + def test_quirky_negative_sign_expression_markup + result = Expression.parse("-", nil) + assert(result.is_a?(VariableLookup)) + assert_equal("-", result.name) + + # for this template, the expression markup is "-" + assert_template_result( + "", + "{{ - 'theme.css' - }}", + ) + end + + def test_expression_cache + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + cache = {} + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + Liquid::Template.parse(template, expression_cache: cache).render + + assert_equal( + ["1", "2", "x", "y"], + cache.to_a.map { _1[0] }.sort, + ) + end + + def test_expression_cache_with_true_boolean + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + parse_context = ParseContext.new(expression_cache: true) + + Liquid::Template.parse(template, parse_context).render + + cache = parse_context.instance_variable_get(:@expression_cache) + + assert_equal( + ["1", "2", "x", "y"], + cache.to_a.map { _1[0] }.sort, + ) + end + + def test_expression_cache_with_lru_redux + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + cache = LruRedux::Cache.new(10) + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + Liquid::Template.parse(template, expression_cache: cache).render + + assert_equal( + ["1", "2", "x", "y"], + cache.to_a.map { _1[0] }.sort, + ) + end + + def test_disable_expression_cache + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + parse_context = Liquid::ParseContext.new(expression_cache: false) + Liquid::Template.parse(template, parse_context).render + assert(parse_context.instance_variable_get(:@expression_cache).nil?) + end + private def assert_expression_result(expect, markup, **assigns) diff --git a/test/integration/parsing_quirks_test.rb b/test/integration/parsing_quirks_test.rb index 48f914635..f5b483fcc 100644 --- a/test/integration/parsing_quirks_test.rb +++ b/test/integration/parsing_quirks_test.rb @@ -134,6 +134,14 @@ def test_contains_in_id def test_incomplete_expression with_error_mode(:lax) do + assert_template_result("false", "{{ false - }}") + assert_template_result("false", "{{ false > }}") + assert_template_result("false", "{{ false < }}") + assert_template_result("false", "{{ false = }}") + assert_template_result("false", "{{ false ! }}") + assert_template_result("false", "{{ false 1 }}") + assert_template_result("false", "{{ false a }}") + assert_template_result("false", "{% liquid assign foo = false -\n%}{{ foo }}") assert_template_result("false", "{% liquid assign foo = false >\n%}{{ foo }}") assert_template_result("false", "{% liquid assign foo = false <\n%}{{ foo }}") diff --git a/test/integration/tags/cycle_tag_test.rb b/test/integration/tags/cycle_tag_test.rb new file mode 100644 index 000000000..a034db17e --- /dev/null +++ b/test/integration/tags/cycle_tag_test.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require 'test_helper' + +class CycleTagTest < Minitest::Test + def test_simple_cycle + template = <<~LIQUID + {%- cycle '1', '2', '3' -%} + {%- cycle '1', '2', '3' -%} + {%- cycle '1', '2', '3' -%} + LIQUID + + assert_template_result("123", template) + end + + def test_simple_cycle_inside_for_loop + template = <<~LIQUID + {%- for i in (1..3) -%} + {% cycle '1', '2', '3' %} + {%- endfor -%} + LIQUID + + assert_template_result("123", template) + end + + def test_cycle_with_variables_inside_for_loop + template = <<~LIQUID + {%- assign a = 1 -%} + {%- assign b = 2 -%} + {%- assign c = 3 -%} + {%- for i in (1..3) -%} + {% cycle a, b, c %} + {%- endfor -%} + LIQUID + + assert_template_result("123", template) + end + + def test_cycle_tag_always_resets_cycle + template = <<~LIQUID + {%- assign a = "1" -%} + {%- cycle a, "2" -%} + {%- cycle a, "2" -%} + LIQUID + + assert_template_result("11", template) + end +end diff --git a/test/integration/tags/raw_tag_test.rb b/test/integration/tags/raw_tag_test.rb index 3629f39c2..2ca43d3b2 100644 --- a/test/integration/tags/raw_tag_test.rb +++ b/test/integration/tags/raw_tag_test.rb @@ -16,6 +16,7 @@ def test_output_in_raw assert_template_result('>{{ test }}<', '> {%- raw -%}{{ test }}{%- endraw -%} <') assert_template_result("> inner <", "> {%- raw -%} inner {%- endraw %} <") assert_template_result("> inner <", "> {%- raw -%} inner {%- endraw -%} <") + assert_template_result("{Hello}", "{% raw %}{{% endraw %}Hello{% raw %}}{% endraw %}") end def test_open_tag_in_raw diff --git a/test/unit/environment_filter_test.rb b/test/unit/environment_filter_test.rb index 942f6a3c7..e710573b1 100644 --- a/test/unit/environment_filter_test.rb +++ b/test/unit/environment_filter_test.rb @@ -36,14 +36,14 @@ def test_strainer assert_equal("public", strainer.invoke("public_filter")) end - def test_stainer_raises_argument_error + def test_strainer_raises_argument_error strainer = @environment.create_strainer(@context) assert_raises(Liquid::ArgumentError) do strainer.invoke("public_filter", 1) end end - def test_stainer_argument_error_contains_backtrace + def test_strainer_argument_error_contains_backtrace strainer = @environment.create_strainer(@context) exception = assert_raises(Liquid::ArgumentError) do @@ -54,8 +54,9 @@ def test_stainer_argument_error_contains_backtrace /\ALiquid error: wrong number of arguments \((1 for 0|given 1, expected 0)\)\z/, exception.message, ) + source = AccessScopeFilters.instance_method(:public_filter).source_location - assert_equal(source.map(&:to_s), exception.backtrace[0].split(':')[0..1]) + assert_equal(source[0..1].map(&:to_s), exception.backtrace[0].split(':')[0..1]) end def test_strainer_only_invokes_public_filter_methods diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index 26a25b629..18e1e439a 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression private def tokenize(input) - Lexer.new(input).tokenize + Lexer.tokenize(StringScanner.new(input)) end end diff --git a/test/unit/parser_unit_test.rb b/test/unit/parser_unit_test.rb index 130b8b793..2c6a3594e 100644 --- a/test/unit/parser_unit_test.rb +++ b/test/unit/parser_unit_test.rb @@ -6,20 +6,20 @@ class ParserUnitTest < Minitest::Test include Liquid def test_consume - p = Parser.new("wat: 7") + p = new_parser("wat: 7") assert_equal('wat', p.consume(:id)) assert_equal(':', p.consume(:colon)) assert_equal('7', p.consume(:number)) end def test_jump - p = Parser.new("wat: 7") + p = new_parser("wat: 7") p.jump(2) assert_equal('7', p.consume(:number)) end def test_consume? - p = Parser.new("wat: 7") + p = new_parser("wat: 7") assert_equal('wat', p.consume?(:id)) assert_equal(false, p.consume?(:dot)) assert_equal(':', p.consume(:colon)) @@ -27,7 +27,7 @@ def test_consume? end def test_id? - p = Parser.new("wat 6 Peter Hegemon") + p = new_parser("wat 6 Peter Hegemon") assert_equal('wat', p.id?('wat')) assert_equal(false, p.id?('endgame')) assert_equal('6', p.consume(:number)) @@ -36,7 +36,7 @@ def test_id? end def test_look - p = Parser.new("wat 6 Peter Hegemon") + p = new_parser("wat 6 Peter Hegemon") assert_equal(true, p.look(:id)) assert_equal('wat', p.consume(:id)) assert_equal(false, p.look(:comparison)) @@ -46,12 +46,12 @@ def test_look end def test_expressions - p = Parser.new("hi.there hi?[5].there? hi.there.bob") + p = new_parser("hi.there hi?[5].there? hi.there.bob") assert_equal('hi.there', p.expression) assert_equal('hi?[5].there?', p.expression) assert_equal('hi.there.bob', p.expression) - p = Parser.new("567 6.0 'lol' \"wut\"") + p = new_parser("567 6.0 'lol' \"wut\"") assert_equal('567', p.expression) assert_equal('6.0', p.expression) assert_equal("'lol'", p.expression) @@ -59,7 +59,7 @@ def test_expressions end def test_ranges - p = Parser.new("(5..7) (1.5..9.6) (young..old) (hi[5].wat..old)") + p = new_parser("(5..7) (1.5..9.6) (young..old) (hi[5].wat..old)") assert_equal('(5..7)', p.expression) assert_equal('(1.5..9.6)', p.expression) assert_equal('(young..old)', p.expression) @@ -67,7 +67,7 @@ def test_ranges end def test_arguments - p = Parser.new("filter: hi.there[5], keyarg: 7") + p = new_parser("filter: hi.there[5], keyarg: 7") assert_equal('filter', p.consume(:id)) assert_equal(':', p.consume(:colon)) assert_equal('hi.there[5]', p.argument) @@ -77,8 +77,14 @@ def test_arguments def test_invalid_expression assert_raises(SyntaxError) do - p = Parser.new("==") + p = new_parser("==") p.expression end end + + private + + def new_parser(str) + Parser.new(StringScanner.new(str)) + end end diff --git a/test/unit/tag_unit_test.rb b/test/unit/tag_unit_test.rb index 7f9b490c9..9b4bf88b6 100644 --- a/test/unit/tag_unit_test.rb +++ b/test/unit/tag_unit_test.rb @@ -6,18 +6,18 @@ class TagUnitTest < Minitest::Test include Liquid def test_tag - tag = Tag.parse('tag', "", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse('tag', "", new_tokenizer, ParseContext.new) assert_equal('liquid::tag', tag.name) assert_equal('', tag.render(Context.new)) end def test_return_raw_text_of_tag - tag = Tag.parse("long_tag", "param1, param2, param3", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse("long_tag", "param1, param2, param3", new_tokenizer, ParseContext.new) assert_equal("long_tag param1, param2, param3", tag.raw) end def test_tag_name_should_return_name_of_the_tag - tag = Tag.parse("some_tag", "", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse("some_tag", "", new_tokenizer, ParseContext.new) assert_equal('some_tag', tag.tag_name) end @@ -26,7 +26,16 @@ def render(_context); end end def test_tag_render_to_output_buffer_nil_value - custom_tag = CustomTag.parse("some_tag", "", Tokenizer.new(""), ParseContext.new) + custom_tag = CustomTag.parse("some_tag", "", new_tokenizer, ParseContext.new) assert_equal('some string', custom_tag.render_to_output_buffer(Context.new, "some string")) end + + private + + def new_tokenizer + Tokenizer.new( + source: "", + string_scanner: StringScanner.new(""), + ) + end end diff --git a/test/unit/tokenizer_unit_test.rb b/test/unit/tokenizer_unit_test.rb index 5e0496e68..76d379d36 100644 --- a/test/unit/tokenizer_unit_test.rb +++ b/test/unit/tokenizer_unit_test.rb @@ -6,6 +6,7 @@ class TokenizerTest < Minitest::Test def test_tokenize_strings assert_equal([' '], tokenize(' ')) assert_equal(['hello world'], tokenize('hello world')) + assert_equal(['{}'], tokenize('{}')) end def test_tokenize_variables @@ -34,6 +35,19 @@ def test_tokenize_with_nil_source_returns_empty_array assert_equal([], tokenize(nil)) end + def test_incomplete_curly_braces + assert_equal(["{{.}", " "], tokenize('{{.} ')) + assert_equal(["{{}", "%}"], tokenize('{{}%}')) + assert_equal(["{{}}", "}"], tokenize('{{}}}')) + end + + def test_unmatching_start_and_end + assert_equal(["{{%}"], tokenize('{{%}')) + assert_equal(["{{%%%}}"], tokenize('{{%%%}}')) + assert_equal(["{%", "}}"], tokenize('{%}}')) + assert_equal(["{%%}", "}"], tokenize('{%%}}')) + end + private def new_tokenizer(source, parse_context: Liquid::ParseContext.new, start_line_number: nil)