From 8ecb703d4d5adb1a2b0c8b3d2510e21d018ee8d3 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Wed, 13 Nov 2024 16:09:42 -0400 Subject: [PATCH 01/33] use StringScanner to improve Expression Parsing and Tokenizer --- Gemfile | 3 + Rakefile | 29 +++- lib/liquid/expression.rb | 106 ++++++++++++- lib/liquid/lexer.rb | 14 +- lib/liquid/tokenizer.rb | 154 ++++++++++++++++++- performance/benchmark.rb | 13 +- performance/theme_runner.rb | 8 + performance/unit/expression_benchmark.rb | 183 +++++++++++++++++++++++ test/integration/expression_test.rb | 2 + test/integration/parsing_quirks_test.rb | 8 + test/integration/tags/raw_tag_test.rb | 1 + test/unit/tokenizer_unit_test.rb | 14 ++ 12 files changed, 520 insertions(+), 15 deletions(-) create mode 100644 performance/unit/expression_benchmark.rb diff --git a/Gemfile b/Gemfile index 013b1700a..2053d5e0a 100644 --- a/Gemfile +++ b/Gemfile @@ -24,3 +24,6 @@ group :test do gem 'rubocop-shopify', '~> 2.12.0', require: false gem 'rubocop-performance', require: false end + +gem "strscan", ">= 3.1" +gem "lru_redux" diff --git a/Rakefile b/Rakefile index 21135d1b9..6ccd2c866 100755 --- a/Rakefile +++ b/Rakefile @@ -71,7 +71,7 @@ end namespace :benchmark do desc "Run the liquid benchmark with lax parsing" - task :run do + task :lax do ruby "./performance/benchmark.rb lax" end @@ -80,11 +80,30 @@ namespace :benchmark do ruby "./performance/benchmark.rb strict" end + desc "Run the liquid benchmark with both lax and strict parsing" + task run: [:lax, :strict] + desc "Run unit benchmarks" - task :unit do - Dir["./performance/unit/*_benchmark.rb"].each do |file| - puts "🧪 Running #{file}" - ruby file + namespace :unit do + task :all do + Dir["./performance/unit/*_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end + end + + task :lexer do + Dir["./performance/unit/lexer_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end + end + + task :expression do + Dir["./performance/unit/expression_benchmark.rb"].each do |file| + puts "🧪 Running #{file}" + ruby file + end end end end diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index a1426732c..11a695ad9 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true +require "lru_redux" + module Liquid - class Expression + class Expression1 LITERALS = { nil => nil, 'nil' => nil, @@ -45,4 +47,106 @@ def self.parse(markup) end end end + + class Expression2 + LITERALS = { + nil => nil, + 'nil' => nil, + 'null' => nil, + '' => nil, + 'true' => true, + 'false' => false, + 'blank' => '', + 'empty' => '' + }.freeze + + DOT = ".".ord + ZERO = "0".ord + NINE = "9".ord + DASH = "-".ord + + # Use an atomic group (?>...) to avoid pathological backtracing from + # malicious input as described in https://github.com/Shopify/liquid/issues/1357 + RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ + CACHE = LruRedux::Cache.new(1_000_000) # most themes would have less than 2,000 unique expression + + class << self + def string_scanner + @ss ||= StringScanner.new("") + end + + def parse(markup) + return unless markup + + markup = markup.strip # markup can be a frozen string + + return CACHE[markup] if CACHE.key?(markup) + + CACHE[markup] = inner_parse(markup) + end + + def inner_parse(markup) + if (markup.start_with?('"') && markup.end_with?('"')) || + (markup.start_with?("'") && markup.end_with?("'")) + return markup[1..-2] + elsif (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX + return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) + end + + return LITERALS[markup] if LITERALS.key?(markup) + + if (num = parse_number(markup)) + num + else + VariableLookup.parse(markup) + end + end + + def parse_number(markup) + ss = string_scanner + ss.string = markup + + is_integer = true + last_dot_pos = nil + num_end_pos = nil + + # the first byte must be a digit, a period, or a dash + byte = ss.scan_byte + + return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) + + while (byte = ss.scan_byte) + return false if byte != DOT && (byte < ZERO || byte > NINE) + + # we found our number and now we are just scanning the rest of the string + next if num_end_pos + + if byte == DOT + if is_integer == false + num_end_pos = ss.pos - 1 + else + is_integer = false + last_dot_pos = ss.pos + end + end + end + + num_end_pos = markup.length if ss.eos? + + return markup.to_i if is_integer + + if num_end_pos + # number ends with a number "123.123" + markup.byteslice(0, num_end_pos).to_f + elsif last_dot_pos + markup.byteslice(0, last_dot_pos).to_f + else + # we should never reach this point + false + end + end + end + end + + Expression = StringScanner.instance_methods.include?(:scan_byte) ? Expression2 : Expression1 end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index b9e5443c1..c2adec78f 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -92,6 +92,7 @@ class Lexer2 SINGLE_COMPARISON_TOKENS = [].tap do |table| table["<".ord] = COMPARISON_LESS_THAN table[">".ord] = COMPARISON_GREATER_THAN + table.freeze end TWO_CHARS_COMPARISON_JUMP_TABLE = [].tap do |table| @@ -103,18 +104,17 @@ class Lexer2 sub_table["=".ord] = COMPARISION_NOT_EQUAL sub_table.freeze end + table.freeze end COMPARISON_JUMP_TABLE = [].tap do |table| table["<".ord] = [].tap do |sub_table| sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT - RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_LESS_THAN } sub_table.freeze end table[">".ord] = [].tap do |sub_table| sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL - RUBY_WHITESPACE.each { |c| sub_table[c.ord] = COMPARISON_GREATER_THAN } sub_table.freeze end table.freeze @@ -157,8 +157,15 @@ class Lexer2 table.freeze end + class << self + def string_scanner + @string_scanner ||= StringScanner.new("") + end + end + def initialize(input) - @ss = StringScanner.new(input) + @ss = Lexer2.string_scanner + @ss.string = input end # rubocop:disable Metrics/BlockNesting @@ -233,5 +240,6 @@ def raise_syntax_error(start_pos) end end + # Remove this once we can depend on strscan >= 3.1.1 Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1 end diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 740f85999..0bb05c73d 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true +require "strscan" + module Liquid - class Tokenizer + class Tokenizer1 attr_reader :line_number, :for_liquid_tag def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) @@ -42,4 +44,154 @@ def tokenize tokens end end + + class Tokenizer2 + attr_reader :line_number, :for_liquid_tag + + TAG_END = /%\}/ + TAG_OR_VARIABLE_START = /\{[\{\%]/ + NEWLINE = /\n/ + + OPEN_CURLEY = "{".ord + CLOSE_CURLEY = "}".ord + PERCENTAGE = "%".ord + + class << self + def string_scanner + @string_scanner ||= StringScanner.new("") + end + end + + def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) + @line_number = line_number || (line_numbers ? 1 : nil) + @for_liquid_tag = for_liquid_tag + @source = source + @offset = 0 + @tokens = [] + tokenize + end + + def shift + token = @tokens[@offset] + + return unless token + + @offset += 1 + + if @line_number + @line_number += @for_liquid_tag ? 1 : token.count("\n") + end + + token + end + + private + + def tokenize + if @for_liquid_tag + @tokens = @source.split("\n") + else + @ss = Tokenizer2.string_scanner + @ss.string = @source + @tokens << shift_normal until @ss.eos? + end + + @ss = nil + @source = nil + end + + def shift_normal + token = next_token + + return unless token + + token + end + + def next_token + # possible states: :text, :tag, :variable + byte_a = @ss.peek_byte + + if byte_a == OPEN_CURLEY + @ss.scan_byte + + byte_b = @ss.peek_byte + + if byte_b == PERCENTAGE + @ss.scan_byte + return next_tag_token + elsif byte_b == OPEN_CURLEY + @ss.scan_byte + return next_variable_token + end + + @ss.pos -= 1 + end + + next_text_token + end + + def next_text_token + start = @ss.pos + + unless @ss.skip_until(TAG_OR_VARIABLE_START) + token = @ss.rest + @ss.terminate + return token + end + + pos = @ss.pos -= 2 + @source.byteslice(start, pos - start) + end + + def next_variable_token + start = @ss.pos - 2 + + byte_a = byte_b = @ss.scan_byte + + while byte_b + byte_a = @ss.scan_byte while byte_a && (byte_a != CLOSE_CURLEY && byte_a != OPEN_CURLEY) + + break unless byte_a + + if @ss.eos? + return byte_a == CLOSE_CURLEY ? @source.byteslice(start, @ss.pos - start) : "{{" + end + + byte_b = @ss.scan_byte + + if byte_a == CLOSE_CURLEY + if byte_b == CLOSE_CURLEY + return @source.byteslice(start, @ss.pos - start) + elsif byte_b != CLOSE_CURLEY + @ss.pos -= 1 + return @source.byteslice(start, @ss.pos - start) + end + elsif byte_a == OPEN_CURLEY && byte_b == PERCENTAGE + return next_tag_token_with_start(start) + end + + byte_a = byte_b + end + + "{{" + end + + def next_tag_token + start = @ss.pos - 2 + if (len = @ss.skip_until(TAG_END)) + @source.byteslice(start, len + 2) + else + "{%" + end + end + + def next_tag_token_with_start(start) + @ss.skip_until(TAG_END) + @source.byteslice(start, @ss.pos - start) + end + end + + # Remove this once we can depend on strscan >= 3.1.1 + Tokenizer = StringScanner.instance_methods.include?(:scan_byte) ? Tokenizer2 : Tokenizer1 end diff --git a/performance/benchmark.rb b/performance/benchmark.rb index f897c5113..b61e9057c 100644 --- a/performance/benchmark.rb +++ b/performance/benchmark.rb @@ -9,14 +9,17 @@ profiler = ThemeRunner.new Benchmark.ips do |x| - x.time = 10 - x.warmup = 5 + x.time = 20 + x.warmup = 10 puts puts "Running benchmark for #{x.time} seconds (with #{x.warmup} seconds warmup)." puts - x.report("parse:") { profiler.compile } - x.report("render:") { profiler.render } - x.report("parse & render:") { profiler.run } + phase = ENV["PHASE"] || "all" + + x.report("tokenize:") { profiler.tokenize } if phase == "all" || phase == "tokenize" + x.report("parse:") { profiler.compile } if phase == "all" || phase == "parse" + x.report("render:") { profiler.render } if phase == "all" || phase == "render" + x.report("parse & render:") { profiler.run } if phase == "all" || phase == "run" end diff --git a/performance/theme_runner.rb b/performance/theme_runner.rb index 3dd5548e3..c158f8778 100644 --- a/performance/theme_runner.rb +++ b/performance/theme_runner.rb @@ -48,6 +48,14 @@ def compile end end + # `tokenize` will just test the tokenizen portion of liquid without any templates + def tokenize + @tests.each do |test_hash| + tokenizer = Liquid::Tokenizer.new(test_hash[:liquid], true) + while tokenizer.shift; end + end + end + # `run` is called to benchmark rendering and compiling at the same time def run each_test do |liquid, layout, assigns, page_template, template_name| diff --git a/performance/unit/expression_benchmark.rb b/performance/unit/expression_benchmark.rb new file mode 100644 index 000000000..ae4eeaa59 --- /dev/null +++ b/performance/unit/expression_benchmark.rb @@ -0,0 +1,183 @@ +# frozen_string_literal: true + +require "benchmark/ips" + +# benchmark liquid lexing + +require 'liquid' + +RubyVM::YJIT.enable + +STRING_MARKUPS = [ + "\"foo\"", + "\"fooooooooooo\"", + "\"foooooooooooooooooooooooooooooo\"", + "'foo'", + "'fooooooooooo'", + "'foooooooooooooooooooooooooooooo'", +] + +VARIABLE_MARKUPS = [ + "article", + "article.title", + "article.title.size", + "very_long_variable_name_2024_11_05", + "very_long_variable_name_2024_11_05.size", +] + +NUMBER_MARKUPS = [ + "0", + "35", + "1241891024912849", + "3.5", + "3.51214128409128", + "12381902839.123819283910283", + "123.456.789", + "-123", + "-12.33", + "-405.231", + "-0", + "0", + "0.0", + "0.0000000000000000000000", + "0.00000000001", +] + +RANGE_MARKUPS = [ + "(1..30)", + "(1...30)", + "(1..30..5)", + "(1.0...30.0)", + "(1.........30)", + "(1..foo)", + "(foo..30)", + "(foo..bar)", + "(foo...bar...100)", + "(foo...bar...100.0)", +] + +LITERAL_MARKUPS = [ + nil, + 'nil', + 'null', + '', + 'true', + 'false', + 'blank', + 'empty', +] + +MARKUPS = { + "string" => STRING_MARKUPS, + "literal" => LITERAL_MARKUPS, + "variable" => VARIABLE_MARKUPS, + "number" => NUMBER_MARKUPS, + "range" => RANGE_MARKUPS, +} + +def compare_objects(object_1, object_2) + if object_1.is_a?(Liquid::VariableLookup) && object_2.is_a?(Liquid::VariableLookup) + return false if object_1.name != object_2.name + elsif object_1 != object_2 + return false + end + + true +end + +def compare_range_lookup(expression_1_result, expression_2_result) + return false unless expression_1_result.is_a?(Liquid::RangeLookup) && expression_2_result.is_a?(Liquid::RangeLookup) + + start_obj_1 = expression_1_result.start_obj + start_obj_2 = expression_2_result.start_obj + + return false unless compare_objects(start_obj_1, start_obj_2) + + end_obj_1 = expression_1_result.end_obj + end_obj_2 = expression_2_result.end_obj + + return false unless compare_objects(end_obj_1, end_obj_2) + + true +end + +MARKUPS.values.flatten.each do |markup| + expression_1_result = Liquid::Expression1.parse(markup) + expression_2_result = Liquid::Expression2.parse(markup) + + next if expression_1_result == expression_2_result + + if expression_1_result.is_a?(Liquid::RangeLookup) && expression_2_result.is_a?(Liquid::RangeLookup) + next if compare_range_lookup(expression_1_result, expression_2_result) + end + + warn "Expression1 and Expression2 results are different for markup: #{markup}" + warn "expected: #{expression_1_result}" + warn "got: #{expression_2_result}" + abort +end + +warmed_up = false + +MARKUPS.each do |type, markups| + Benchmark.ips do |x| + if warmed_up + x.config(time: 10, warmup: 5) + warmed_up = true + else + x.config(time: 10) + end + + x.report("Liquid::Expression1#parse: #{type}") do + if Liquid::Expression != Liquid::Expression1 + Liquid.send(:remove_const, :Expression) + Liquid.const_set(:Expression, Liquid::Expression1) + end + + markups.each do |markup| + Liquid::Expression1.parse(markup) + end + end + + x.report("Liquid::Expression2#parse: #{type}") do + if Liquid::Expression != Liquid::Expression2 + Liquid.send(:remove_const, :Expression) + Liquid.const_set(:Expression, Liquid::Expression2) + end + + markups.each do |markup| + Liquid::Expression2.parse(markup) + end + end + + x.compare! + end +end + +Benchmark.ips do |x| + x.config(time: 10) + + x.report("Liquid::Expression1#parse: all") do + if Liquid::Expression != Liquid::Expression1 + Liquid.send(:remove_const, :Expression) + Liquid.const_set(:Expression, Liquid::Expression1) + end + + MARKUPS.values.flatten.each do |markup| + Liquid::Expression1.parse(markup) + end + end + + x.report("Liquid::Expression2#parse: all") do + if Liquid::Expression != Liquid::Expression2 + Liquid.send(:remove_const, :Expression) + Liquid.const_set(:Expression, Liquid::Expression2) + end + + MARKUPS.values.flatten.each do |markup| + Liquid::Expression2.parse(markup) + end + end + + x.compare! +end diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index 80f6f4173..7f451afd1 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -13,6 +13,7 @@ def test_string assert_template_result("double quoted", '{{"double quoted"}}') assert_template_result("spaced", "{{ 'spaced' }}") assert_template_result("spaced2", "{{ 'spaced2' }}") + assert_template_result("emoji🔥", "{{ 'emoji🔥' }}") end def test_int @@ -22,6 +23,7 @@ def test_int end def test_float + assert_template_result("-17.42", "{{ -17.42 }}") assert_template_result("2.5", "{{ 2.5 }}") assert_expression_result(1.5, "1.5") end diff --git a/test/integration/parsing_quirks_test.rb b/test/integration/parsing_quirks_test.rb index 48f914635..f5b483fcc 100644 --- a/test/integration/parsing_quirks_test.rb +++ b/test/integration/parsing_quirks_test.rb @@ -134,6 +134,14 @@ def test_contains_in_id def test_incomplete_expression with_error_mode(:lax) do + assert_template_result("false", "{{ false - }}") + assert_template_result("false", "{{ false > }}") + assert_template_result("false", "{{ false < }}") + assert_template_result("false", "{{ false = }}") + assert_template_result("false", "{{ false ! }}") + assert_template_result("false", "{{ false 1 }}") + assert_template_result("false", "{{ false a }}") + assert_template_result("false", "{% liquid assign foo = false -\n%}{{ foo }}") assert_template_result("false", "{% liquid assign foo = false >\n%}{{ foo }}") assert_template_result("false", "{% liquid assign foo = false <\n%}{{ foo }}") diff --git a/test/integration/tags/raw_tag_test.rb b/test/integration/tags/raw_tag_test.rb index 3629f39c2..2ca43d3b2 100644 --- a/test/integration/tags/raw_tag_test.rb +++ b/test/integration/tags/raw_tag_test.rb @@ -16,6 +16,7 @@ def test_output_in_raw assert_template_result('>{{ test }}<', '> {%- raw -%}{{ test }}{%- endraw -%} <') assert_template_result("> inner <", "> {%- raw -%} inner {%- endraw %} <") assert_template_result("> inner <", "> {%- raw -%} inner {%- endraw -%} <") + assert_template_result("{Hello}", "{% raw %}{{% endraw %}Hello{% raw %}}{% endraw %}") end def test_open_tag_in_raw diff --git a/test/unit/tokenizer_unit_test.rb b/test/unit/tokenizer_unit_test.rb index 5e0496e68..76d379d36 100644 --- a/test/unit/tokenizer_unit_test.rb +++ b/test/unit/tokenizer_unit_test.rb @@ -6,6 +6,7 @@ class TokenizerTest < Minitest::Test def test_tokenize_strings assert_equal([' '], tokenize(' ')) assert_equal(['hello world'], tokenize('hello world')) + assert_equal(['{}'], tokenize('{}')) end def test_tokenize_variables @@ -34,6 +35,19 @@ def test_tokenize_with_nil_source_returns_empty_array assert_equal([], tokenize(nil)) end + def test_incomplete_curly_braces + assert_equal(["{{.}", " "], tokenize('{{.} ')) + assert_equal(["{{}", "%}"], tokenize('{{}%}')) + assert_equal(["{{}}", "}"], tokenize('{{}}}')) + end + + def test_unmatching_start_and_end + assert_equal(["{{%}"], tokenize('{{%}')) + assert_equal(["{{%%%}}"], tokenize('{{%%%}}')) + assert_equal(["{%", "}}"], tokenize('{%}}')) + assert_equal(["{%%}", "}"], tokenize('{%%}}')) + end + private def new_tokenizer(source, parse_context: Liquid::ParseContext.new, start_line_number: nil) From 1de602536238eb15aa12ec132262f458e59c5826 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 14 Nov 2024 00:39:05 -0400 Subject: [PATCH 02/33] decrease the size of Expression2 LRU cache to store 5 themes --- lib/liquid/expression.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 11a695ad9..22cc7227e 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -68,7 +68,7 @@ class Expression2 # Use an atomic group (?>...) to avoid pathological backtracing from # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - CACHE = LruRedux::Cache.new(1_000_000) # most themes would have less than 2,000 unique expression + CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression class << self def string_scanner From 0b1dc295ff9604d67369515176d463ba7e4c71ac Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 14 Nov 2024 00:59:01 -0400 Subject: [PATCH 03/33] fix quirky negative sign expression markup parsing --- lib/liquid.rb | 2 +- lib/liquid/expression.rb | 3 ++- test/integration/expression_test.rb | 14 +++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/liquid.rb b/lib/liquid.rb index 0a970005d..367fc3c43 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -68,7 +68,6 @@ module Liquid require 'liquid/errors' require 'liquid/interrupts' require 'liquid/strainer_template' -require 'liquid/expression' require 'liquid/context' require 'liquid/tag' require 'liquid/block_body' @@ -77,6 +76,7 @@ module Liquid require 'liquid/variable_lookup' require 'liquid/range_lookup' require 'liquid/resource_limits' +require 'liquid/expression' require 'liquid/template' require 'liquid/condition' require 'liquid/utils' diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 22cc7227e..e10f97939 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -57,7 +57,8 @@ class Expression2 'true' => true, 'false' => false, 'blank' => '', - 'empty' => '' + 'empty' => '', + '-' => VariableLookup.parse("-") }.freeze DOT = ".".ord diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index 7f451afd1..dc57ad286 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -23,7 +23,7 @@ def test_int end def test_float - assert_template_result("-17.42", "{{ -17.42 }}") + assert_template_result("-17.42", "{{ -17.42 }}") assert_template_result("2.5", "{{ 2.5 }}") assert_expression_result(1.5, "1.5") end @@ -42,6 +42,18 @@ def test_range ) end + def test_quirky_negative_sign_expression_markup + result = Expression.parse("-") + assert(result.is_a?(VariableLookup)) + assert_equal("-", result.name) + + # for this template, the expression markup is "-" + assert_template_result( + "", + "{{ - 'theme.css' - }}", + ) + end + private def assert_expression_result(expect, markup, **assigns) From 7c8a269b4d5ef134834019bcc026b2043512669e Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 14 Nov 2024 15:26:43 -0400 Subject: [PATCH 04/33] fix cycle tag not resetting --- lib/liquid/tags/cycle.rb | 8 ++++- test/integration/tags/cycle_tag_test.rb | 48 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 test/integration/tags/cycle_tag_test.rb diff --git a/lib/liquid/tags/cycle.rb b/lib/liquid/tags/cycle.rb index 06d788de3..c2d94d5f4 100644 --- a/lib/liquid/tags/cycle.rb +++ b/lib/liquid/tags/cycle.rb @@ -68,7 +68,13 @@ def render_to_output_buffer(context, output) def variables_from_string(markup) markup.split(',').collect do |var| var =~ /\s*(#{QuotedFragment})\s*/o - Regexp.last_match(1) ? parse_expression(Regexp.last_match(1)) : nil + next unless Regexp.last_match(1) + + # Expression Parser returns cached objects, and we need to dup them to + # start the cycle over for each new cycle call. + # Liquid-C does not have a cache, so we don't need to dup the object. + var = parse_expression(Regexp.last_match(1)) + var.is_a?(VariableLookup) ? var.dup : var end.compact end diff --git a/test/integration/tags/cycle_tag_test.rb b/test/integration/tags/cycle_tag_test.rb new file mode 100644 index 000000000..a034db17e --- /dev/null +++ b/test/integration/tags/cycle_tag_test.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require 'test_helper' + +class CycleTagTest < Minitest::Test + def test_simple_cycle + template = <<~LIQUID + {%- cycle '1', '2', '3' -%} + {%- cycle '1', '2', '3' -%} + {%- cycle '1', '2', '3' -%} + LIQUID + + assert_template_result("123", template) + end + + def test_simple_cycle_inside_for_loop + template = <<~LIQUID + {%- for i in (1..3) -%} + {% cycle '1', '2', '3' %} + {%- endfor -%} + LIQUID + + assert_template_result("123", template) + end + + def test_cycle_with_variables_inside_for_loop + template = <<~LIQUID + {%- assign a = 1 -%} + {%- assign b = 2 -%} + {%- assign c = 3 -%} + {%- for i in (1..3) -%} + {% cycle a, b, c %} + {%- endfor -%} + LIQUID + + assert_template_result("123", template) + end + + def test_cycle_tag_always_resets_cycle + template = <<~LIQUID + {%- assign a = "1" -%} + {%- cycle a, "2" -%} + {%- cycle a, "2" -%} + LIQUID + + assert_template_result("11", template) + end +end From 92fa33419248d5b100fcc6c6e15a172c9bdb490c Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 14 Nov 2024 16:51:45 -0400 Subject: [PATCH 05/33] don't cache string expressions to be more perfomant --- lib/liquid/expression.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index e10f97939..d860e20b9 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -81,16 +81,18 @@ def parse(markup) markup = markup.strip # markup can be a frozen string + if (markup.start_with?('"') && markup.end_with?('"')) || + (markup.start_with?("'") && markup.end_with?("'")) + return markup[1..-2] + end + return CACHE[markup] if CACHE.key?(markup) CACHE[markup] = inner_parse(markup) end def inner_parse(markup) - if (markup.start_with?('"') && markup.end_with?('"')) || - (markup.start_with?("'") && markup.end_with?("'")) - return markup[1..-2] - elsif (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX + if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) end From a672f3836ccf7008966e2db873e355a5749ecb11 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 14 Nov 2024 17:50:46 -0400 Subject: [PATCH 06/33] don't cache literals for Expression parser --- lib/liquid/expression.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index d860e20b9..c9a50fd15 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -84,6 +84,8 @@ def parse(markup) if (markup.start_with?('"') && markup.end_with?('"')) || (markup.start_with?("'") && markup.end_with?("'")) return markup[1..-2] + elsif LITERALS.key?(markup) + return LITERALS[markup] end return CACHE[markup] if CACHE.key?(markup) @@ -96,8 +98,6 @@ def inner_parse(markup) return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) end - return LITERALS[markup] if LITERALS.key?(markup) - if (num = parse_number(markup)) num else From 19adfbd86393b4baecb21ecb0d636d5dcc321a69 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 15 Nov 2024 17:24:48 -0400 Subject: [PATCH 07/33] don't force strscan 3.1 --- Gemfile | 1 - liquid.gemspec | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 2053d5e0a..e74d19ec7 100644 --- a/Gemfile +++ b/Gemfile @@ -25,5 +25,4 @@ group :test do gem 'rubocop-performance', require: false end -gem "strscan", ">= 3.1" gem "lru_redux" diff --git a/liquid.gemspec b/liquid.gemspec index 9f7e1923a..c692fc1d9 100644 --- a/liquid.gemspec +++ b/liquid.gemspec @@ -30,6 +30,7 @@ Gem::Specification.new do |s| s.add_dependency("strscan") s.add_dependency("bigdecimal") + s.add_dependency("lru_redux") s.add_development_dependency('rake', '~> 13.0') s.add_development_dependency('minitest') From eff5c5de8ec0a8a24e99e11ddc460ea96e61ab23 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 15 Nov 2024 17:24:52 -0400 Subject: [PATCH 08/33] code clean up --- lib/liquid/lexer.rb | 2 +- lib/liquid/tokenizer.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index c2adec78f..4e8d3d6ef 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -164,7 +164,7 @@ def string_scanner end def initialize(input) - @ss = Lexer2.string_scanner + @ss = self.class.string_scanner @ss.string = input end diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 0bb05c73d..8920a5145 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -91,7 +91,7 @@ def tokenize if @for_liquid_tag @tokens = @source.split("\n") else - @ss = Tokenizer2.string_scanner + @ss = self.class.string_scanner @ss.string = @source @tokens << shift_normal until @ss.eos? end From 3c16c27ee1b4ee4b9535fe25096af08865cc646d Mon Sep 17 00:00:00 2001 From: Michael Go Date: Sun, 17 Nov 2024 16:36:04 -0400 Subject: [PATCH 09/33] add StringScannerPool for thread safety --- lib/liquid.rb | 1 + lib/liquid/expression.rb | 9 ++--- lib/liquid/lexer.rb | 58 ++++++++++++++----------------- lib/liquid/string_scanner_pool.rb | 23 ++++++++++++ lib/liquid/tokenizer.rb | 12 ++----- 5 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 lib/liquid/string_scanner_pool.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index 367fc3c43..63c087bee 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -49,6 +49,7 @@ module Liquid require "liquid/version" require "liquid/deprecations" require "liquid/const" +require "liquid/string_scanner_pool" require 'liquid/standardfilters' require 'liquid/file_system' require 'liquid/parser_switching' diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index c9a50fd15..ea8c68211 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -72,10 +72,6 @@ class Expression2 CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression class << self - def string_scanner - @ss ||= StringScanner.new("") - end - def parse(markup) return unless markup @@ -106,8 +102,7 @@ def inner_parse(markup) end def parse_number(markup) - ss = string_scanner - ss.string = markup + ss = StringScannerPool.pop(markup) is_integer = true last_dot_pos = nil @@ -147,6 +142,8 @@ def parse_number(markup) # we should never reach this point false end + ensure + StringScannerPool.release(ss) end end end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 4e8d3d6ef..3dae267a8 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -157,40 +157,34 @@ class Lexer2 table.freeze end - class << self - def string_scanner - @string_scanner ||= StringScanner.new("") - end - end - def initialize(input) - @ss = self.class.string_scanner - @ss.string = input + @input = input end # rubocop:disable Metrics/BlockNesting def tokenize + ss = StringScannerPool.pop(@input) @output = [] - until @ss.eos? - @ss.skip(WHITESPACE_OR_NOTHING) + until ss.eos? + ss.skip(WHITESPACE_OR_NOTHING) - break if @ss.eos? + break if ss.eos? - start_pos = @ss.pos - peeked = @ss.peek_byte + start_pos = ss.pos + peeked = ss.peek_byte if (special = SPECIAL_TABLE[peeked]) - @ss.scan_byte + ss.scan_byte # Special case for ".." - if special == DOT && @ss.peek_byte == DOT_ORD - @ss.scan_byte + if special == DOT && ss.peek_byte == DOT_ORD + ss.scan_byte @output << DOTDOT elsif special == DASH # Special case for negative numbers - if (peeked_byte = @ss.peek_byte) && NUMBER_TABLE[peeked_byte] - @ss.pos -= 1 - @output << [:number, @ss.scan(NUMBER_LITERAL)] + if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte] + ss.pos -= 1 + @output << [:number, ss.scan(NUMBER_LITERAL)] else @output << special end @@ -198,25 +192,25 @@ def tokenize @output << special end elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) - @ss.scan_byte - if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) + ss.scan_byte + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) @output << found - @ss.scan_byte + ss.scan_byte else - raise_syntax_error(start_pos) + raise_syntax_error(start_pos, ss) end elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) - @ss.scan_byte - if (peeked_byte = @ss.peek_byte) && (found = sub_table[peeked_byte]) + ss.scan_byte + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) @output << found - @ss.scan_byte + ss.scan_byte else @output << SINGLE_COMPARISON_TOKENS[peeked] end else type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] - if type && (t = @ss.scan(pattern)) + if type && (t = ss.scan(pattern)) # Special case for "contains" @output << if type == :id && t == "contains" && @output.last&.first != :dot COMPARISON_CONTAINS @@ -224,19 +218,21 @@ def tokenize [type, t] end else - raise_syntax_error(start_pos) + raise_syntax_error(start_pos, ss) end end end # rubocop:enable Metrics/BlockNesting @output << EOS + ensure + StringScannerPool.release(ss) end - def raise_syntax_error(start_pos) - @ss.pos = start_pos + def raise_syntax_error(start_pos, ss) + ss.pos = start_pos # the character could be a UTF-8 character, use getch to get all the bytes - raise SyntaxError, "Unexpected character #{@ss.getch}" + raise SyntaxError, "Unexpected character #{ss.getch}" end end diff --git a/lib/liquid/string_scanner_pool.rb b/lib/liquid/string_scanner_pool.rb new file mode 100644 index 000000000..4114d7a2d --- /dev/null +++ b/lib/liquid/string_scanner_pool.rb @@ -0,0 +1,23 @@ +module Liquid + class StringScannerPool + class << self + def pop(input) + @ss_pool ||= [StringScanner.new("")] * 5 + + if @ss_pool.empty? + StringScanner.new(input) + else + ss = @ss_pool.pop + ss.string = input + ss + end + end + + def release(ss) + binding.irb if ss.nil? + @ss_pool ||= [] + @ss_pool << ss + end + end + end +end diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 8920a5145..1ed3edf37 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -56,12 +56,6 @@ class Tokenizer2 CLOSE_CURLEY = "}".ord PERCENTAGE = "%".ord - class << self - def string_scanner - @string_scanner ||= StringScanner.new("") - end - end - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag @@ -91,13 +85,13 @@ def tokenize if @for_liquid_tag @tokens = @source.split("\n") else - @ss = self.class.string_scanner - @ss.string = @source + @ss = StringScannerPool.pop(@source) @tokens << shift_normal until @ss.eos? end - @ss = nil @source = nil + ensure + StringScannerPool.release(@ss) if @ss end def shift_normal From 002e4caea7f08182cdad3306b2cac77712e6d8d1 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Sun, 17 Nov 2024 16:50:59 -0400 Subject: [PATCH 10/33] refactor Lexer to be static class function --- lib/liquid/lexer.rb | 170 +++++++++++++++--------------- lib/liquid/parser.rb | 3 +- lib/liquid/string_scanner_pool.rb | 5 +- test/unit/lexer_unit_test.rb | 2 +- 4 files changed, 88 insertions(+), 92 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 3dae267a8..d9a073aa8 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -25,38 +25,37 @@ class Lexer1 COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/ WHITESPACE_OR_NOTHING = /\s*/ - def initialize(input) - @ss = StringScanner.new(input) - end - - def tokenize - @output = [] - - until @ss.eos? - @ss.skip(WHITESPACE_OR_NOTHING) - break if @ss.eos? - tok = if (t = @ss.scan(COMPARISON_OPERATOR)) - [:comparison, t] - elsif (t = @ss.scan(STRING_LITERAL)) - [:string, t] - elsif (t = @ss.scan(NUMBER_LITERAL)) - [:number, t] - elsif (t = @ss.scan(IDENTIFIER)) - [:id, t] - elsif (t = @ss.scan(DOTDOT)) - [:dotdot, t] - else - c = @ss.getch - if (s = SPECIALS[c]) - [s, c] + class << self + def tokenize(input) + ss = StringScanner.new(input) + output = [] + + until ss.eos? + ss.skip(WHITESPACE_OR_NOTHING) + break if ss.eos? + tok = if (t = ss.scan(COMPARISON_OPERATOR)) + [:comparison, t] + elsif (t = ss.scan(STRING_LITERAL)) + [:string, t] + elsif (t = ss.scan(NUMBER_LITERAL)) + [:number, t] + elsif (t = ss.scan(IDENTIFIER)) + [:id, t] + elsif (t = ss.scan(DOTDOT)) + [:dotdot, t] else - raise SyntaxError, "Unexpected character #{c}" + c = ss.getch + if (s = SPECIALS[c]) + [s, c] + else + raise SyntaxError, "Unexpected character #{c}" + end end + output << tok end - @output << tok - end - @output << [:end_of_string] + output << [:end_of_string] + end end end @@ -157,82 +156,79 @@ class Lexer2 table.freeze end - def initialize(input) - @input = input - end - # rubocop:disable Metrics/BlockNesting - def tokenize - ss = StringScannerPool.pop(@input) - @output = [] + class << self + def tokenize(input) + ss = StringScannerPool.pop(input) + output = [] - until ss.eos? - ss.skip(WHITESPACE_OR_NOTHING) + until ss.eos? + ss.skip(WHITESPACE_OR_NOTHING) - break if ss.eos? + break if ss.eos? - start_pos = ss.pos - peeked = ss.peek_byte + start_pos = ss.pos + peeked = ss.peek_byte - if (special = SPECIAL_TABLE[peeked]) - ss.scan_byte - # Special case for ".." - if special == DOT && ss.peek_byte == DOT_ORD + if (special = SPECIAL_TABLE[peeked]) ss.scan_byte - @output << DOTDOT - elsif special == DASH - # Special case for negative numbers - if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte] - ss.pos -= 1 - @output << [:number, ss.scan(NUMBER_LITERAL)] + # Special case for ".." + if special == DOT && ss.peek_byte == DOT_ORD + ss.scan_byte + output << DOTDOT + elsif special == DASH + # Special case for negative numbers + if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte] + ss.pos -= 1 + output << [:number, ss.scan(NUMBER_LITERAL)] + else + output << special + end else - @output << special + output << special end - else - @output << special - end - elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) - ss.scan_byte - if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) - @output << found + elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked]) ss.scan_byte - else - raise_syntax_error(start_pos, ss) - end - elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) - ss.scan_byte - if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) - @output << found + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) + output << found + ss.scan_byte + else + raise_syntax_error(start_pos, ss) + end + elsif (sub_table = COMPARISON_JUMP_TABLE[peeked]) ss.scan_byte - else - @output << SINGLE_COMPARISON_TOKENS[peeked] - end - else - type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] - - if type && (t = ss.scan(pattern)) - # Special case for "contains" - @output << if type == :id && t == "contains" && @output.last&.first != :dot - COMPARISON_CONTAINS + if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte]) + output << found + ss.scan_byte else - [type, t] + output << SINGLE_COMPARISON_TOKENS[peeked] end else - raise_syntax_error(start_pos, ss) + type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked] + + if type && (t = ss.scan(pattern)) + # Special case for "contains" + output << if type == :id && t == "contains" && output.last&.first != :dot + COMPARISON_CONTAINS + else + [type, t] + end + else + raise_syntax_error(start_pos, ss) + end end end + # rubocop:enable Metrics/BlockNesting + output << EOS + ensure + StringScannerPool.release(ss) end - # rubocop:enable Metrics/BlockNesting - @output << EOS - ensure - StringScannerPool.release(ss) - end - - def raise_syntax_error(start_pos, ss) - ss.pos = start_pos - # the character could be a UTF-8 character, use getch to get all the bytes - raise SyntaxError, "Unexpected character #{ss.getch}" + def raise_syntax_error(start_pos, ss) + ss.pos = start_pos + # the character could be a UTF-8 character, use getch to get all the bytes + raise SyntaxError, "Unexpected character #{ss.getch}" + end end end diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index 8560fc58e..ed18909a2 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -3,8 +3,7 @@ module Liquid class Parser def initialize(input) - l = Lexer.new(input) - @tokens = l.tokenize + @tokens = Lexer.tokenize(input) @p = 0 # pointer to current location end diff --git a/lib/liquid/string_scanner_pool.rb b/lib/liquid/string_scanner_pool.rb index 4114d7a2d..6ebc49212 100644 --- a/lib/liquid/string_scanner_pool.rb +++ b/lib/liquid/string_scanner_pool.rb @@ -1,8 +1,10 @@ +# frozen_string_literal: true + module Liquid class StringScannerPool class << self def pop(input) - @ss_pool ||= [StringScanner.new("")] * 5 + @ss_pool ||= 5.times.each_with_object([]) { |_i, arr| arr << StringScanner.new("") } if @ss_pool.empty? StringScanner.new(input) @@ -14,7 +16,6 @@ def pop(input) end def release(ss) - binding.irb if ss.nil? @ss_pool ||= [] @ss_pool << ss end diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index 26a25b629..d4f22fb60 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression private def tokenize(input) - Lexer.new(input).tokenize + Lexer.tokenize(input) end end From 7c592c1c008929d26f1a26b201a2719c01e99026 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Sun, 17 Nov 2024 23:36:10 -0400 Subject: [PATCH 11/33] store StringScanner in ParseContext and reuse it through parsing --- lib/liquid.rb | 1 - lib/liquid/context.rb | 6 +++- lib/liquid/expression.rb | 44 ++++++++++++++--------------- lib/liquid/lexer.rb | 10 +++---- lib/liquid/parse_context.rb | 21 +++++++++++--- lib/liquid/parser.rb | 4 +-- lib/liquid/range_lookup.rb | 6 ++-- lib/liquid/string_scanner_pool.rb | 24 ---------------- lib/liquid/tags/for.rb | 2 +- lib/liquid/tags/if.rb | 2 +- lib/liquid/tokenizer.rb | 32 +++++++++++++++------ lib/liquid/variable.rb | 2 +- lib/liquid/variable_lookup.rb | 16 +++++++---- performance/theme_runner.rb | 7 ++++- test/integration/expression_test.rb | 2 +- test/unit/lexer_unit_test.rb | 2 +- test/unit/parser_unit_test.rb | 26 ++++++++++------- test/unit/tag_unit_test.rb | 17 ++++++++--- 18 files changed, 126 insertions(+), 98 deletions(-) delete mode 100644 lib/liquid/string_scanner_pool.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index 63c087bee..367fc3c43 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -49,7 +49,6 @@ module Liquid require "liquid/version" require "liquid/deprecations" require "liquid/const" -require "liquid/string_scanner_pool" require 'liquid/standardfilters' require 'liquid/file_system' require 'liquid/parser_switching' diff --git a/lib/liquid/context.rb b/lib/liquid/context.rb index 19daaeb73..1ef272d46 100644 --- a/lib/liquid/context.rb +++ b/lib/liquid/context.rb @@ -40,6 +40,10 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro @global_filter = nil @disabled_tags = {} + # Instead of constructing new StringScanner objects for each Expression parse, + # we recycle the same one. + @string_scanner = StringScanner.new("") + @registers.static[:cached_partials] ||= {} @registers.static[:file_system] ||= environment.file_system @registers.static[:template_factory] ||= Liquid::TemplateFactory.new @@ -176,7 +180,7 @@ def []=(key, value) # Example: # products == empty #=> products.empty? def [](expression) - evaluate(Expression.parse(expression)) + evaluate(Expression.parse(expression, @string_scanner)) end def key?(key) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index ea8c68211..ee98d7577 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -22,7 +22,7 @@ class Expression1 # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - def self.parse(markup) + def self.parse(markup, _ss = nil) return nil unless markup markup = markup.strip @@ -35,14 +35,14 @@ def self.parse(markup) when INTEGERS_REGEX Regexp.last_match(1).to_i when RANGES_REGEX - RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) + RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2), nil) when FLOATS_REGEX Regexp.last_match(1).to_f else if LITERALS.key?(markup) LITERALS[markup] else - VariableLookup.parse(markup) + VariableLookup.parse(markup, nil) end end end @@ -58,7 +58,7 @@ class Expression2 'false' => false, 'blank' => '', 'empty' => '', - '-' => VariableLookup.parse("-") + '-' => VariableLookup.parse("-", nil), }.freeze DOT = ".".ord @@ -72,7 +72,7 @@ class Expression2 CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression class << self - def parse(markup) + def parse(markup, ss = StringScanner.new("")) return unless markup markup = markup.strip # markup can be a frozen string @@ -86,33 +86,36 @@ def parse(markup) return CACHE[markup] if CACHE.key?(markup) - CACHE[markup] = inner_parse(markup) + CACHE[markup] = inner_parse(markup, ss) end - def inner_parse(markup) + def inner_parse(markup, ss) if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX - return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) + return RangeLookup.parse( + Regexp.last_match(1), + Regexp.last_match(2), + ss, + ) end - if (num = parse_number(markup)) + if (num = parse_number(markup, ss)) num else - VariableLookup.parse(markup) + VariableLookup.parse(markup, ss) end end - def parse_number(markup) - ss = StringScannerPool.pop(markup) - - is_integer = true - last_dot_pos = nil - num_end_pos = nil - + def parse_number(markup, ss) + ss.string = markup # the first byte must be a digit, a period, or a dash byte = ss.scan_byte return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) + is_integer = true + last_dot_pos = nil + num_end_pos = nil + while (byte = ss.scan_byte) return false if byte != DOT && (byte < ZERO || byte > NINE) @@ -136,14 +139,9 @@ def parse_number(markup) if num_end_pos # number ends with a number "123.123" markup.byteslice(0, num_end_pos).to_f - elsif last_dot_pos - markup.byteslice(0, last_dot_pos).to_f else - # we should never reach this point - false + markup.byteslice(0, last_dot_pos).to_f end - ensure - StringScannerPool.release(ss) end end end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index d9a073aa8..0ab9f3ca3 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -26,8 +26,8 @@ class Lexer1 WHITESPACE_OR_NOTHING = /\s*/ class << self - def tokenize(input) - ss = StringScanner.new(input) + def tokenize(input, ss = StringScanner.new("")) + ss.string = input output = [] until ss.eos? @@ -158,8 +158,8 @@ class Lexer2 # rubocop:disable Metrics/BlockNesting class << self - def tokenize(input) - ss = StringScannerPool.pop(input) + def tokenize(input, ss) + ss.string = input output = [] until ss.eos? @@ -220,8 +220,6 @@ def tokenize(input) end # rubocop:enable Metrics/BlockNesting output << EOS - ensure - StringScannerPool.release(ss) end def raise_syntax_error(start_pos, ss) diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index 7bd5418d5..f66e86be3 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -3,7 +3,7 @@ module Liquid class ParseContext attr_accessor :locale, :line_number, :trim_whitespace, :depth - attr_reader :partial, :warnings, :error_mode, :environment + attr_reader :partial, :warnings, :error_mode, :environment, :string_scanner def initialize(options = Const::EMPTY_HASH) @environment = options.fetch(:environment, Environment.default) @@ -12,6 +12,10 @@ def initialize(options = Const::EMPTY_HASH) @locale = @template_options[:locale] ||= I18n.new @warnings = [] + # constructing new StringScanner in Lexer, Tokenizer, etc is expensive + # This StringScanner will be shared by all of them + @string_scanner = StringScanner.new("") + self.depth = 0 self.partial = false end @@ -24,12 +28,21 @@ def new_block_body Liquid::BlockBody.new end - def new_tokenizer(markup, start_line_number: nil, for_liquid_tag: false) - Tokenizer.new(markup, line_number: start_line_number, for_liquid_tag: for_liquid_tag) + def new_parser(input) + Parser.new(input, @string_scanner) + end + + def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) + Tokenizer.new( + source: source, + string_scanner: @string_scanner, + line_number: start_line_number, + for_liquid_tag: for_liquid_tag, + ) end def parse_expression(markup) - Expression.parse(markup) + Expression.parse(markup, string_scanner) end def partial=(value) diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index ed18909a2..a4f781d70 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -2,8 +2,8 @@ module Liquid class Parser - def initialize(input) - @tokens = Lexer.tokenize(input) + def initialize(input, string_scanner) + @tokens = Lexer.tokenize(input, string_scanner) @p = 0 # pointer to current location end diff --git a/lib/liquid/range_lookup.rb b/lib/liquid/range_lookup.rb index fd208a676..763717240 100644 --- a/lib/liquid/range_lookup.rb +++ b/lib/liquid/range_lookup.rb @@ -2,9 +2,9 @@ module Liquid class RangeLookup - def self.parse(start_markup, end_markup) - start_obj = Expression.parse(start_markup) - end_obj = Expression.parse(end_markup) + def self.parse(start_markup, end_markup, string_scanner) + start_obj = Expression.parse(start_markup, string_scanner) + end_obj = Expression.parse(end_markup, string_scanner) if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate) new(start_obj, end_obj) else diff --git a/lib/liquid/string_scanner_pool.rb b/lib/liquid/string_scanner_pool.rb deleted file mode 100644 index 6ebc49212..000000000 --- a/lib/liquid/string_scanner_pool.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -module Liquid - class StringScannerPool - class << self - def pop(input) - @ss_pool ||= 5.times.each_with_object([]) { |_i, arr| arr << StringScanner.new("") } - - if @ss_pool.empty? - StringScanner.new(input) - else - ss = @ss_pool.pop - ss.string = input - ss - end - end - - def release(ss) - @ss_pool ||= [] - @ss_pool << ss - end - end - end -end diff --git a/lib/liquid/tags/for.rb b/lib/liquid/tags/for.rb index 0dda3081d..27af15bf1 100644 --- a/lib/liquid/tags/for.rb +++ b/lib/liquid/tags/for.rb @@ -88,7 +88,7 @@ def lax_parse(markup) end def strict_parse(markup) - p = Parser.new(markup) + p = @parse_context.new_parser(markup) @variable_name = p.consume(:id) raise SyntaxError, options[:locale].t("errors.syntax.for_invalid_in") unless p.id?('in') diff --git a/lib/liquid/tags/if.rb b/lib/liquid/tags/if.rb index bcf250a11..040fecb84 100644 --- a/lib/liquid/tags/if.rb +++ b/lib/liquid/tags/if.rb @@ -102,7 +102,7 @@ def lax_parse(markup) end def strict_parse(markup) - p = Parser.new(markup) + p = @parse_context.new_parser(markup) condition = parse_binary_comparisons(p) p.consume(:end_of_string) condition diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index 1ed3edf37..dcfc8fd4a 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -6,7 +6,13 @@ module Liquid class Tokenizer1 attr_reader :line_number, :for_liquid_tag - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) + def initialize( + source:, + string_scanner:, # this is not used + line_numbers: false, + line_number: nil, + for_liquid_tag: false + ) @source = source.to_s.to_str @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag @@ -56,12 +62,22 @@ class Tokenizer2 CLOSE_CURLEY = "}".ord PERCENTAGE = "%".ord - def initialize(source, line_numbers = false, line_number: nil, for_liquid_tag: false) - @line_number = line_number || (line_numbers ? 1 : nil) + def initialize( + source:, + string_scanner:, + line_numbers: false, + line_number: nil, + for_liquid_tag: false + ) + @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag - @source = source - @offset = 0 - @tokens = [] + @source = source + @offset = 0 + @tokens = [] + + @ss = string_scanner + @ss.string = @source + tokenize end @@ -85,13 +101,11 @@ def tokenize if @for_liquid_tag @tokens = @source.split("\n") else - @ss = StringScannerPool.pop(@source) @tokens << shift_normal until @ss.eos? end @source = nil - ensure - StringScannerPool.release(@ss) if @ss + @ss = nil end def shift_normal diff --git a/lib/liquid/variable.rb b/lib/liquid/variable.rb index 2bcf871ea..090690559 100644 --- a/lib/liquid/variable.rb +++ b/lib/liquid/variable.rb @@ -61,7 +61,7 @@ def lax_parse(markup) def strict_parse(markup) @filters = [] - p = Parser.new(markup) + p = @parse_context.new_parser(markup) return if p.look(:end_of_string) diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb index 1ade30ef9..f544b3bc4 100644 --- a/lib/liquid/variable_lookup.rb +++ b/lib/liquid/variable_lookup.rb @@ -6,16 +6,19 @@ class VariableLookup attr_reader :name, :lookups - def self.parse(markup) - new(markup) + def self.parse(markup, string_scanner) + new(markup, string_scanner) end - def initialize(markup) + def initialize(markup, string_scanner = StringScanner.new("")) lookups = markup.scan(VariableParser) name = lookups.shift if name&.start_with?('[') && name&.end_with?(']') - name = Expression.parse(name[1..-2]) + name = Expression.parse( + name[1..-2], + string_scanner, + ) end @name = name @@ -25,7 +28,10 @@ def initialize(markup) @lookups.each_index do |i| lookup = lookups[i] if lookup&.start_with?('[') && lookup&.end_with?(']') - lookups[i] = Expression.parse(lookup[1..-2]) + lookups[i] = Expression.parse( + lookup[1..-2], + string_scanner, + ) elsif COMMAND_METHODS.include?(lookup) @command_flags |= 1 << i end diff --git a/performance/theme_runner.rb b/performance/theme_runner.rb index c158f8778..469503670 100644 --- a/performance/theme_runner.rb +++ b/performance/theme_runner.rb @@ -50,8 +50,13 @@ def compile # `tokenize` will just test the tokenizen portion of liquid without any templates def tokenize + ss = StringScanner.new("") @tests.each do |test_hash| - tokenizer = Liquid::Tokenizer.new(test_hash[:liquid], true) + tokenizer = Liquid::Tokenizer.new( + source: test_hash[:liquid], + string_scanner: ss, + line_numbers: true, + ) while tokenizer.shift; end end end diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index dc57ad286..6d29fc792 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -43,7 +43,7 @@ def test_range end def test_quirky_negative_sign_expression_markup - result = Expression.parse("-") + result = Expression.parse("-", nil) assert(result.is_a?(VariableLookup)) assert_equal("-", result.name) diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index d4f22fb60..e6f161288 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression private def tokenize(input) - Lexer.tokenize(input) + Lexer.tokenize(input, StringScanner.new("")) end end diff --git a/test/unit/parser_unit_test.rb b/test/unit/parser_unit_test.rb index 130b8b793..789c5b663 100644 --- a/test/unit/parser_unit_test.rb +++ b/test/unit/parser_unit_test.rb @@ -6,20 +6,20 @@ class ParserUnitTest < Minitest::Test include Liquid def test_consume - p = Parser.new("wat: 7") + p = new_parser("wat: 7") assert_equal('wat', p.consume(:id)) assert_equal(':', p.consume(:colon)) assert_equal('7', p.consume(:number)) end def test_jump - p = Parser.new("wat: 7") + p = new_parser("wat: 7") p.jump(2) assert_equal('7', p.consume(:number)) end def test_consume? - p = Parser.new("wat: 7") + p = new_parser("wat: 7") assert_equal('wat', p.consume?(:id)) assert_equal(false, p.consume?(:dot)) assert_equal(':', p.consume(:colon)) @@ -27,7 +27,7 @@ def test_consume? end def test_id? - p = Parser.new("wat 6 Peter Hegemon") + p = new_parser("wat 6 Peter Hegemon") assert_equal('wat', p.id?('wat')) assert_equal(false, p.id?('endgame')) assert_equal('6', p.consume(:number)) @@ -36,7 +36,7 @@ def test_id? end def test_look - p = Parser.new("wat 6 Peter Hegemon") + p = new_parser("wat 6 Peter Hegemon") assert_equal(true, p.look(:id)) assert_equal('wat', p.consume(:id)) assert_equal(false, p.look(:comparison)) @@ -46,12 +46,12 @@ def test_look end def test_expressions - p = Parser.new("hi.there hi?[5].there? hi.there.bob") + p = new_parser("hi.there hi?[5].there? hi.there.bob") assert_equal('hi.there', p.expression) assert_equal('hi?[5].there?', p.expression) assert_equal('hi.there.bob', p.expression) - p = Parser.new("567 6.0 'lol' \"wut\"") + p = new_parser("567 6.0 'lol' \"wut\"") assert_equal('567', p.expression) assert_equal('6.0', p.expression) assert_equal("'lol'", p.expression) @@ -59,7 +59,7 @@ def test_expressions end def test_ranges - p = Parser.new("(5..7) (1.5..9.6) (young..old) (hi[5].wat..old)") + p = new_parser("(5..7) (1.5..9.6) (young..old) (hi[5].wat..old)") assert_equal('(5..7)', p.expression) assert_equal('(1.5..9.6)', p.expression) assert_equal('(young..old)', p.expression) @@ -67,7 +67,7 @@ def test_ranges end def test_arguments - p = Parser.new("filter: hi.there[5], keyarg: 7") + p = new_parser("filter: hi.there[5], keyarg: 7") assert_equal('filter', p.consume(:id)) assert_equal(':', p.consume(:colon)) assert_equal('hi.there[5]', p.argument) @@ -77,8 +77,14 @@ def test_arguments def test_invalid_expression assert_raises(SyntaxError) do - p = Parser.new("==") + p = new_parser("==") p.expression end end + + private + + def new_parser(str) + Parser.new(str, StringScanner.new("")) + end end diff --git a/test/unit/tag_unit_test.rb b/test/unit/tag_unit_test.rb index 7f9b490c9..9b4bf88b6 100644 --- a/test/unit/tag_unit_test.rb +++ b/test/unit/tag_unit_test.rb @@ -6,18 +6,18 @@ class TagUnitTest < Minitest::Test include Liquid def test_tag - tag = Tag.parse('tag', "", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse('tag', "", new_tokenizer, ParseContext.new) assert_equal('liquid::tag', tag.name) assert_equal('', tag.render(Context.new)) end def test_return_raw_text_of_tag - tag = Tag.parse("long_tag", "param1, param2, param3", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse("long_tag", "param1, param2, param3", new_tokenizer, ParseContext.new) assert_equal("long_tag param1, param2, param3", tag.raw) end def test_tag_name_should_return_name_of_the_tag - tag = Tag.parse("some_tag", "", Tokenizer.new(""), ParseContext.new) + tag = Tag.parse("some_tag", "", new_tokenizer, ParseContext.new) assert_equal('some_tag', tag.tag_name) end @@ -26,7 +26,16 @@ def render(_context); end end def test_tag_render_to_output_buffer_nil_value - custom_tag = CustomTag.parse("some_tag", "", Tokenizer.new(""), ParseContext.new) + custom_tag = CustomTag.parse("some_tag", "", new_tokenizer, ParseContext.new) assert_equal('some string', custom_tag.render_to_output_buffer(Context.new, "some string")) end + + private + + def new_tokenizer + Tokenizer.new( + source: "", + string_scanner: StringScanner.new(""), + ) + end end From cef64e277ecb985db8201746b4d1b41c2bcd40a1 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 18 Nov 2024 19:53:43 -0400 Subject: [PATCH 12/33] early match number expressions with Regex --- lib/liquid/expression.rb | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index ee98d7577..0a569e999 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -69,6 +69,9 @@ class Expression2 # Use an atomic group (?>...) to avoid pathological backtracing from # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ + INTEGER_REGEX = /\A(-?\d+)\z/ + FLOAT_REGEX = /\A(-?\d+)\.\d+\z/ + CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression class << self @@ -112,8 +115,16 @@ def parse_number(markup, ss) return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) - is_integer = true - last_dot_pos = nil + # check if the markup is simple integer or float + case markup + when INTEGER_REGEX + return markup.to_i + when FLOAT_REGEX + return markup.to_f + end + + # The markup could be a float with multiple dots + first_dot_pos = nil num_end_pos = nil while (byte = ss.scan_byte) @@ -123,24 +134,23 @@ def parse_number(markup, ss) next if num_end_pos if byte == DOT - if is_integer == false - num_end_pos = ss.pos - 1 + if first_dot_pos.nil? + first_dot_pos = ss.pos else - is_integer = false - last_dot_pos = ss.pos + # we found another dot, so we know that the number ends here + num_end_pos = ss.pos - 1 end end end num_end_pos = markup.length if ss.eos? - return markup.to_i if is_integer - if num_end_pos # number ends with a number "123.123" markup.byteslice(0, num_end_pos).to_f else - markup.byteslice(0, last_dot_pos).to_f + # number ends with a dot "123." + markup.byteslice(0, first_dot_pos).to_f end end end From 71506ad54e43fdf0ed476a47cc67c7635db61824 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 18 Nov 2024 20:12:10 -0400 Subject: [PATCH 13/33] avoid setting up StringScanner for parsing simple numbers --- lib/liquid/expression.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 0a569e999..0d8a44711 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -109,12 +109,6 @@ def inner_parse(markup, ss) end def parse_number(markup, ss) - ss.string = markup - # the first byte must be a digit, a period, or a dash - byte = ss.scan_byte - - return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) - # check if the markup is simple integer or float case markup when INTEGER_REGEX @@ -123,6 +117,12 @@ def parse_number(markup, ss) return markup.to_f end + ss.string = markup + # the first byte must be a digit, a period, or a dash + byte = ss.scan_byte + + return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE) + # The markup could be a float with multiple dots first_dot_pos = nil num_end_pos = nil From 40191022e847be536a99aa6947c74117470ae814 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 21 Nov 2024 21:30:37 -0400 Subject: [PATCH 14/33] initialize StringScanner with input for default --- lib/liquid/lexer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 0ab9f3ca3..2f9ea20e5 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -26,7 +26,7 @@ class Lexer1 WHITESPACE_OR_NOTHING = /\s*/ class << self - def tokenize(input, ss = StringScanner.new("")) + def tokenize(input, ss = StringScanner.new(input)) ss.string = input output = [] From 91be3dd75ed37e113ad64d528e41302fefc73ac6 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 21 Nov 2024 22:01:07 -0400 Subject: [PATCH 15/33] keep the expression parse caching per document parsing --- lib/liquid/expression.rb | 20 ++++++++++++-------- lib/liquid/parse_context.rb | 5 +++-- lib/liquid/range_lookup.rb | 6 +++--- lib/liquid/variable_lookup.rb | 8 +++++--- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 0d8a44711..1b7d0ca23 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -22,7 +22,7 @@ class Expression1 # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - def self.parse(markup, _ss = nil) + def self.parse(markup, _ss = nil, _cache = nil) return nil unless markup markup = markup.strip @@ -72,10 +72,8 @@ class Expression2 INTEGER_REGEX = /\A(-?\d+)\z/ FLOAT_REGEX = /\A(-?\d+)\.\d+\z/ - CACHE = LruRedux::Cache.new(10_000) # most themes would have less than 2,000 unique expression - class << self - def parse(markup, ss = StringScanner.new("")) + def parse(markup, ss = StringScanner.new(""), cache = nil) return unless markup markup = markup.strip # markup can be a frozen string @@ -87,24 +85,30 @@ def parse(markup, ss = StringScanner.new("")) return LITERALS[markup] end - return CACHE[markup] if CACHE.key?(markup) + # Cache only exists during parsing + if cache + return cache[markup] if cache.key?(markup) - CACHE[markup] = inner_parse(markup, ss) + cache[markup] = inner_parse(markup, ss, cache) + else + inner_parse(markup, ss, nil) + end end - def inner_parse(markup, ss) + def inner_parse(markup, ss, cache) if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX return RangeLookup.parse( Regexp.last_match(1), Regexp.last_match(2), ss, + cache, ) end if (num = parse_number(markup, ss)) num else - VariableLookup.parse(markup, ss) + VariableLookup.parse(markup, ss, cache) end end diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index f66e86be3..ae2b3ee02 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -3,7 +3,7 @@ module Liquid class ParseContext attr_accessor :locale, :line_number, :trim_whitespace, :depth - attr_reader :partial, :warnings, :error_mode, :environment, :string_scanner + attr_reader :partial, :warnings, :error_mode, :environment, :string_scanner, :expression_cache def initialize(options = Const::EMPTY_HASH) @environment = options.fetch(:environment, Environment.default) @@ -15,6 +15,7 @@ def initialize(options = Const::EMPTY_HASH) # constructing new StringScanner in Lexer, Tokenizer, etc is expensive # This StringScanner will be shared by all of them @string_scanner = StringScanner.new("") + @expression_cache = LruRedux::Cache.new(10_000) self.depth = 0 self.partial = false @@ -42,7 +43,7 @@ def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) end def parse_expression(markup) - Expression.parse(markup, string_scanner) + Expression.parse(markup, string_scanner, expression_cache) end def partial=(value) diff --git a/lib/liquid/range_lookup.rb b/lib/liquid/range_lookup.rb index 763717240..bc316fe12 100644 --- a/lib/liquid/range_lookup.rb +++ b/lib/liquid/range_lookup.rb @@ -2,9 +2,9 @@ module Liquid class RangeLookup - def self.parse(start_markup, end_markup, string_scanner) - start_obj = Expression.parse(start_markup, string_scanner) - end_obj = Expression.parse(end_markup, string_scanner) + def self.parse(start_markup, end_markup, string_scanner, cache = nil) + start_obj = Expression.parse(start_markup, string_scanner, cache) + end_obj = Expression.parse(end_markup, string_scanner, cache) if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate) new(start_obj, end_obj) else diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb index f544b3bc4..06c24b764 100644 --- a/lib/liquid/variable_lookup.rb +++ b/lib/liquid/variable_lookup.rb @@ -6,11 +6,11 @@ class VariableLookup attr_reader :name, :lookups - def self.parse(markup, string_scanner) - new(markup, string_scanner) + def self.parse(markup, string_scanner, cache = nil) + new(markup, string_scanner, cache) end - def initialize(markup, string_scanner = StringScanner.new("")) + def initialize(markup, string_scanner = StringScanner.new(""), cache = nil) lookups = markup.scan(VariableParser) name = lookups.shift @@ -18,6 +18,7 @@ def initialize(markup, string_scanner = StringScanner.new("")) name = Expression.parse( name[1..-2], string_scanner, + cache, ) end @name = name @@ -31,6 +32,7 @@ def initialize(markup, string_scanner = StringScanner.new("")) lookups[i] = Expression.parse( lookup[1..-2], string_scanner, + cache, ) elsif COMMAND_METHODS.include?(lookup) @command_flags |= 1 << i From ac374a208fe13e5ec8079e4c39ca24019ee35308 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:19:42 -0400 Subject: [PATCH 16/33] freeze Expression2 parse results --- lib/liquid/expression.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 1b7d0ca23..5ed4712f8 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -89,9 +89,9 @@ def parse(markup, ss = StringScanner.new(""), cache = nil) if cache return cache[markup] if cache.key?(markup) - cache[markup] = inner_parse(markup, ss, cache) + cache[markup] = inner_parse(markup, ss, cache).freeze else - inner_parse(markup, ss, nil) + inner_parse(markup, ss, nil).freeze end end From e05719fb82bbd43d4d7886453e1a7c9a298f847e Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:22:53 -0400 Subject: [PATCH 17/33] use Kernel Integer parsing for simple Integer expression --- lib/liquid/expression.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 5ed4712f8..b3e0b36e1 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -116,7 +116,7 @@ def parse_number(markup, ss) # check if the markup is simple integer or float case markup when INTEGER_REGEX - return markup.to_i + return Integer(markup, 10) when FLOAT_REGEX return markup.to_f end From 253ec81b565ce3fa879b32e5692d5bfb795c8601 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:28:45 -0400 Subject: [PATCH 18/33] don't add a breaking change to Parser constructor --- lib/liquid/parse_context.rb | 3 ++- lib/liquid/parser.rb | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index ae2b3ee02..d8471475c 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -30,7 +30,8 @@ def new_block_body end def new_parser(input) - Parser.new(input, @string_scanner) + @string_scanner.string = input + Parser.new(@string_scanner) end def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index a4f781d70..cba3dc7c6 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -2,7 +2,8 @@ module Liquid class Parser - def initialize(input, string_scanner) + def initialize(input) + input = input.is_a?(StringScanner) ? input : StringScanner.new(input.to_s) @tokens = Lexer.tokenize(input, string_scanner) @p = 0 # pointer to current location end From 04bd9dbe90f894b9cb08bde8ec2f263b90553e20 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:35:39 -0400 Subject: [PATCH 19/33] refactor Lexer to only take StringScanner --- lib/liquid/lexer.rb | 6 ++---- lib/liquid/parse_context.rb | 4 ++-- lib/liquid/parser.rb | 4 ++-- test/unit/lexer_unit_test.rb | 2 +- test/unit/parser_unit_test.rb | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 2f9ea20e5..3b23f5ff9 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -26,8 +26,7 @@ class Lexer1 WHITESPACE_OR_NOTHING = /\s*/ class << self - def tokenize(input, ss = StringScanner.new(input)) - ss.string = input + def tokenize(ss) output = [] until ss.eos? @@ -158,8 +157,7 @@ class Lexer2 # rubocop:disable Metrics/BlockNesting class << self - def tokenize(input, ss) - ss.string = input + def tokenize(ss) output = [] until ss.eos? diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index d8471475c..5eebd0459 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -3,7 +3,7 @@ module Liquid class ParseContext attr_accessor :locale, :line_number, :trim_whitespace, :depth - attr_reader :partial, :warnings, :error_mode, :environment, :string_scanner, :expression_cache + attr_reader :partial, :warnings, :error_mode, :environment def initialize(options = Const::EMPTY_HASH) @environment = options.fetch(:environment, Environment.default) @@ -44,7 +44,7 @@ def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) end def parse_expression(markup) - Expression.parse(markup, string_scanner, expression_cache) + Expression.parse(markup, @string_scanner, @expression_cache) end def partial=(value) diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index cba3dc7c6..645dfa3a1 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -3,8 +3,8 @@ module Liquid class Parser def initialize(input) - input = input.is_a?(StringScanner) ? input : StringScanner.new(input.to_s) - @tokens = Lexer.tokenize(input, string_scanner) + ss = input.is_a?(StringScanner) ? input : StringScanner.new(input) + @tokens = Lexer.tokenize(ss) @p = 0 # pointer to current location end diff --git a/test/unit/lexer_unit_test.rb b/test/unit/lexer_unit_test.rb index e6f161288..18e1e439a 100644 --- a/test/unit/lexer_unit_test.rb +++ b/test/unit/lexer_unit_test.rb @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression private def tokenize(input) - Lexer.tokenize(input, StringScanner.new("")) + Lexer.tokenize(StringScanner.new(input)) end end diff --git a/test/unit/parser_unit_test.rb b/test/unit/parser_unit_test.rb index 789c5b663..2c6a3594e 100644 --- a/test/unit/parser_unit_test.rb +++ b/test/unit/parser_unit_test.rb @@ -85,6 +85,6 @@ def test_invalid_expression private def new_parser(str) - Parser.new(str, StringScanner.new("")) + Parser.new(StringScanner.new(str)) end end From 0a17c152895ea3b482614082d18e39f5c26c1975 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:39:48 -0400 Subject: [PATCH 20/33] freeze '-' VariableLookup in Expression --- lib/liquid/expression.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index b3e0b36e1..32f357e70 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -58,7 +58,9 @@ class Expression2 'false' => false, 'blank' => '', 'empty' => '', - '-' => VariableLookup.parse("-", nil), + # in lax mode, minus sign can be a VariableLookup + # For simplicity and performace, we treat it like a literal + '-' => VariableLookup.parse("-", nil).freeze, }.freeze DOT = ".".ord From e9f86724f60b981de23f1f3047b7ad0cbdc5208a Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:40:41 -0400 Subject: [PATCH 21/33] code styling --- lib/liquid/expression.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 32f357e70..2bef9cb52 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -89,9 +89,7 @@ def parse(markup, ss = StringScanner.new(""), cache = nil) # Cache only exists during parsing if cache - return cache[markup] if cache.key?(markup) - - cache[markup] = inner_parse(markup, ss, cache).freeze + cache.fetch(markup) { inner_parse(markup, ss, cache).freeze } else inner_parse(markup, ss, nil).freeze end From 313d01706acff3b2cbb3a1e7dd89eaac3b4135f6 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 15:56:22 -0400 Subject: [PATCH 22/33] code styling --- lib/liquid.rb | 3 +++ lib/liquid/expression.rb | 3 ++- lib/liquid/lexer.rb | 4 +--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/liquid.rb b/lib/liquid.rb index 367fc3c43..4d0a71a64 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -21,6 +21,8 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +require "strscan" + module Liquid FilterSeparator = /\|/ ArgumentSeparator = ',' @@ -44,6 +46,7 @@ module Liquid VariableParser = /\[(?>[^\[\]]+|\g<0>)*\]|#{VariableSegment}+\??/o RAISE_EXCEPTION_LAMBDA = ->(_e) { raise } + HAS_STRING_SCANNER_SCAN_BYTE = StringScanner.instance_methods.include?(:scan_byte) end require "liquid/version" diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 2bef9cb52..8eb1f2c32 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -160,5 +160,6 @@ def parse_number(markup, ss) end end - Expression = StringScanner.instance_methods.include?(:scan_byte) ? Expression2 : Expression1 + # Remove this once we can depend on strscan >= 3.1.1 + Expression = HAS_STRING_SCANNER_SCAN_BYTE ? Expression2 : Expression1 end diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 3b23f5ff9..ac4a0b8fc 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require "strscan" - module Liquid class Lexer1 SPECIALS = { @@ -229,5 +227,5 @@ def raise_syntax_error(start_pos, ss) end # Remove this once we can depend on strscan >= 3.1.1 - Lexer = StringScanner.instance_methods.include?(:scan_byte) ? Lexer2 : Lexer1 + Lexer = HAS_STRING_SCANNER_SCAN_BYTE ? Lexer2 : Lexer1 end From 2c7d68669046f4d4e3120784e44833b83deafc84 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 16:09:33 -0400 Subject: [PATCH 23/33] adding caching to context variable lookup --- lib/liquid/context.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/liquid/context.rb b/lib/liquid/context.rb index 1ef272d46..83aab805a 100644 --- a/lib/liquid/context.rb +++ b/lib/liquid/context.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "lru_redux" + module Liquid # Context keeps the variable stack and resolves variables, as well as keywords # @@ -39,6 +41,7 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro @filters = [] @global_filter = nil @disabled_tags = {} + @expression_cache = LruRedux::ThreadSafeCache.new(1000) # Instead of constructing new StringScanner objects for each Expression parse, # we recycle the same one. @@ -180,7 +183,7 @@ def []=(key, value) # Example: # products == empty #=> products.empty? def [](expression) - evaluate(Expression.parse(expression, @string_scanner)) + evaluate(Expression.parse(expression, @string_scanner, @expression_cache)) end def key?(key) From 42d822bda9cb7b69273cc69aa18ad7a6bdd2f918 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 25 Nov 2024 17:01:49 -0400 Subject: [PATCH 24/33] users can provide optional expression cache --- Gemfile | 3 +-- lib/liquid/expression.rb | 4 ++- lib/liquid/parse_context.rb | 5 +++- test/integration/expression_test.rb | 39 +++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index e74d19ec7..6802722cb 100644 --- a/Gemfile +++ b/Gemfile @@ -8,6 +8,7 @@ end gemspec gem "base64" +gem "lru_redux" group :benchmark, :test do gem 'benchmark-ips' @@ -24,5 +25,3 @@ group :test do gem 'rubocop-shopify', '~> 2.12.0', require: false gem 'rubocop-performance', require: false end - -gem "lru_redux" diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 8eb1f2c32..5b3f0cd18 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -89,7 +89,9 @@ def parse(markup, ss = StringScanner.new(""), cache = nil) # Cache only exists during parsing if cache - cache.fetch(markup) { inner_parse(markup, ss, cache).freeze } + return cache[markup] if cache.key?(markup) + + cache[markup] = inner_parse(markup, ss, cache).freeze else inner_parse(markup, ss, nil).freeze end diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index 5eebd0459..030529724 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -15,7 +15,10 @@ def initialize(options = Const::EMPTY_HASH) # constructing new StringScanner in Lexer, Tokenizer, etc is expensive # This StringScanner will be shared by all of them @string_scanner = StringScanner.new("") - @expression_cache = LruRedux::Cache.new(10_000) + + @expression_cache = if options[:expression_cache] != false + options[:expression_cache] || LruRedux::Cache.new(10_000) + end self.depth = 0 self.partial = false diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index 6d29fc792..190a6bec9 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'test_helper' +require 'lru_redux' class ExpressionTest < Minitest::Test def test_keyword_literals @@ -54,6 +55,44 @@ def test_quirky_negative_sign_expression_markup ) end + def test_expression_cache + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + cache = LruRedux::Cache.new(10) + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + Liquid::Template.parse(template, expression_cache: cache).render + + assert_equal( + ["1", "2", "x", "y"], + cache.to_a.map { _1[0] }.sort, + ) + end + + def test_disable_expression_cache + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + parse_context = Liquid::ParseContext.new(expression_cache: false) + Liquid::Template.parse(template, parse_context).render + assert(parse_context.instance_variable_get(:@expression_cache).nil?) + end + private def assert_expression_result(expect, markup, **assigns) From d48708ae46422a479ec5ff62f6ab1b0792e319e3 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Tue, 26 Nov 2024 14:28:52 -0400 Subject: [PATCH 25/33] skip expression caching with Expression1 --- test/integration/expression_test.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index 190a6bec9..bb2c02e0b 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -57,6 +57,7 @@ def test_quirky_negative_sign_expression_markup def test_expression_cache skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + skip("Expression Caching is only available with Expression2") if Liquid::Expression != Liquid::Expression2 cache = LruRedux::Cache.new(10) template = <<~LIQUID @@ -78,6 +79,7 @@ def test_expression_cache def test_disable_expression_cache skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + skip("Expression Caching is only available with Expression2") if Liquid::Expression != Liquid::Expression2 template = <<~LIQUID {% assign x = 1 %} From 7c9b77e8cf7058805d71e665146871af2585b852 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Tue, 7 Jan 2025 14:51:29 -0400 Subject: [PATCH 26/33] remove StringScanner version check in Lexer and Tokenizer --- lib/liquid/lexer.rb | 60 +------------------------------------- lib/liquid/tokenizer.rb | 64 +++++------------------------------------ liquid.gemspec | 2 +- 3 files changed, 9 insertions(+), 117 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index ac4a0b8fc..b28e87842 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -1,62 +1,7 @@ # frozen_string_literal: true module Liquid - class Lexer1 - SPECIALS = { - '|' => :pipe, - '.' => :dot, - ':' => :colon, - ',' => :comma, - '[' => :open_square, - ']' => :close_square, - '(' => :open_round, - ')' => :close_round, - '?' => :question, - '-' => :dash, - }.freeze - IDENTIFIER = /[a-zA-Z_][\w-]*\??/ - SINGLE_STRING_LITERAL = /'[^\']*'/ - DOUBLE_STRING_LITERAL = /"[^\"]*"/ - STRING_LITERAL = Regexp.union(SINGLE_STRING_LITERAL, DOUBLE_STRING_LITERAL) - NUMBER_LITERAL = /-?\d+(\.\d+)?/ - DOTDOT = /\.\./ - COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/ - WHITESPACE_OR_NOTHING = /\s*/ - - class << self - def tokenize(ss) - output = [] - - until ss.eos? - ss.skip(WHITESPACE_OR_NOTHING) - break if ss.eos? - tok = if (t = ss.scan(COMPARISON_OPERATOR)) - [:comparison, t] - elsif (t = ss.scan(STRING_LITERAL)) - [:string, t] - elsif (t = ss.scan(NUMBER_LITERAL)) - [:number, t] - elsif (t = ss.scan(IDENTIFIER)) - [:id, t] - elsif (t = ss.scan(DOTDOT)) - [:dotdot, t] - else - c = ss.getch - if (s = SPECIALS[c]) - [s, c] - else - raise SyntaxError, "Unexpected character #{c}" - end - end - output << tok - end - - output << [:end_of_string] - end - end - end - - class Lexer2 + class Lexer CLOSE_ROUND = [:close_round, ")"].freeze CLOSE_SQUARE = [:close_square, "]"].freeze COLON = [:colon, ":"].freeze @@ -225,7 +170,4 @@ def raise_syntax_error(start_pos, ss) end end end - - # Remove this once we can depend on strscan >= 3.1.1 - Lexer = HAS_STRING_SCANNER_SCAN_BYTE ? Lexer2 : Lexer1 end diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb index dcfc8fd4a..008ec070d 100644 --- a/lib/liquid/tokenizer.rb +++ b/lib/liquid/tokenizer.rb @@ -3,55 +3,7 @@ require "strscan" module Liquid - class Tokenizer1 - attr_reader :line_number, :for_liquid_tag - - def initialize( - source:, - string_scanner:, # this is not used - line_numbers: false, - line_number: nil, - for_liquid_tag: false - ) - @source = source.to_s.to_str - @line_number = line_number || (line_numbers ? 1 : nil) - @for_liquid_tag = for_liquid_tag - @offset = 0 - @tokens = tokenize - end - - def shift - token = @tokens[@offset] - return nil unless token - - @offset += 1 - - if @line_number - @line_number += @for_liquid_tag ? 1 : token.count("\n") - end - - token - end - - private - - def tokenize - return [] if @source.empty? - - return @source.split("\n") if @for_liquid_tag - - tokens = @source.split(TemplateParser) - - # removes the rogue empty element at the beginning of the array - if tokens[0]&.empty? - @offset += 1 - end - - tokens - end - end - - class Tokenizer2 + class Tokenizer attr_reader :line_number, :for_liquid_tag TAG_END = /%\}/ @@ -71,14 +23,15 @@ def initialize( ) @line_number = line_number || (line_numbers ? 1 : nil) @for_liquid_tag = for_liquid_tag - @source = source + @source = source.to_s.to_str @offset = 0 @tokens = [] - @ss = string_scanner - @ss.string = @source - - tokenize + if @source + @ss = string_scanner + @ss.string = @source + tokenize + end end def shift @@ -199,7 +152,4 @@ def next_tag_token_with_start(start) @source.byteslice(start, @ss.pos - start) end end - - # Remove this once we can depend on strscan >= 3.1.1 - Tokenizer = StringScanner.instance_methods.include?(:scan_byte) ? Tokenizer2 : Tokenizer1 end diff --git a/liquid.gemspec b/liquid.gemspec index c692fc1d9..a58a36019 100644 --- a/liquid.gemspec +++ b/liquid.gemspec @@ -28,7 +28,7 @@ Gem::Specification.new do |s| s.require_path = "lib" - s.add_dependency("strscan") + s.add_dependency("strscan", ">= 3.1.1") s.add_dependency("bigdecimal") s.add_dependency("lru_redux") From 97cada61f9533902db29ddd19e6a2c615ba5eaf6 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 14:21:57 -0400 Subject: [PATCH 27/33] remove Expression2 strscan workaround --- lib/liquid/expression.rb | 50 +---------- performance/unit/expression_benchmark.rb | 103 ++--------------------- performance/unit/lexer_benchmark.rb | 23 +---- test/integration/expression_test.rb | 2 - 4 files changed, 10 insertions(+), 168 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index 5b3f0cd18..f2abb2fc1 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -3,52 +3,7 @@ require "lru_redux" module Liquid - class Expression1 - LITERALS = { - nil => nil, - 'nil' => nil, - 'null' => nil, - '' => nil, - 'true' => true, - 'false' => false, - 'blank' => '', - 'empty' => '' - }.freeze - - INTEGERS_REGEX = /\A(-?\d+)\z/ - FLOATS_REGEX = /\A(-?\d[\d\.]+)\z/ - - # Use an atomic group (?>...) to avoid pathological backtracing from - # malicious input as described in https://github.com/Shopify/liquid/issues/1357 - RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - - def self.parse(markup, _ss = nil, _cache = nil) - return nil unless markup - - markup = markup.strip - if (markup.start_with?('"') && markup.end_with?('"')) || - (markup.start_with?("'") && markup.end_with?("'")) - return markup[1..-2] - end - - case markup - when INTEGERS_REGEX - Regexp.last_match(1).to_i - when RANGES_REGEX - RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2), nil) - when FLOATS_REGEX - Regexp.last_match(1).to_f - else - if LITERALS.key?(markup) - LITERALS[markup] - else - VariableLookup.parse(markup, nil) - end - end - end - end - - class Expression2 + class Expression LITERALS = { nil => nil, 'nil' => nil, @@ -161,7 +116,4 @@ def parse_number(markup, ss) end end end - - # Remove this once we can depend on strscan >= 3.1.1 - Expression = HAS_STRING_SCANNER_SCAN_BYTE ? Expression2 : Expression1 end diff --git a/performance/unit/expression_benchmark.rb b/performance/unit/expression_benchmark.rb index ae4eeaa59..8cd552595 100644 --- a/performance/unit/expression_benchmark.rb +++ b/performance/unit/expression_benchmark.rb @@ -75,109 +75,20 @@ "range" => RANGE_MARKUPS, } -def compare_objects(object_1, object_2) - if object_1.is_a?(Liquid::VariableLookup) && object_2.is_a?(Liquid::VariableLookup) - return false if object_1.name != object_2.name - elsif object_1 != object_2 - return false - end - - true -end - -def compare_range_lookup(expression_1_result, expression_2_result) - return false unless expression_1_result.is_a?(Liquid::RangeLookup) && expression_2_result.is_a?(Liquid::RangeLookup) - - start_obj_1 = expression_1_result.start_obj - start_obj_2 = expression_2_result.start_obj - - return false unless compare_objects(start_obj_1, start_obj_2) - - end_obj_1 = expression_1_result.end_obj - end_obj_2 = expression_2_result.end_obj - - return false unless compare_objects(end_obj_1, end_obj_2) - - true -end - -MARKUPS.values.flatten.each do |markup| - expression_1_result = Liquid::Expression1.parse(markup) - expression_2_result = Liquid::Expression2.parse(markup) - - next if expression_1_result == expression_2_result - - if expression_1_result.is_a?(Liquid::RangeLookup) && expression_2_result.is_a?(Liquid::RangeLookup) - next if compare_range_lookup(expression_1_result, expression_2_result) - end - - warn "Expression1 and Expression2 results are different for markup: #{markup}" - warn "expected: #{expression_1_result}" - warn "got: #{expression_2_result}" - abort -end - -warmed_up = false - -MARKUPS.each do |type, markups| - Benchmark.ips do |x| - if warmed_up - x.config(time: 10, warmup: 5) - warmed_up = true - else - x.config(time: 10) - end - - x.report("Liquid::Expression1#parse: #{type}") do - if Liquid::Expression != Liquid::Expression1 - Liquid.send(:remove_const, :Expression) - Liquid.const_set(:Expression, Liquid::Expression1) - end - - markups.each do |markup| - Liquid::Expression1.parse(markup) - end - end - - x.report("Liquid::Expression2#parse: #{type}") do - if Liquid::Expression != Liquid::Expression2 - Liquid.send(:remove_const, :Expression) - Liquid.const_set(:Expression, Liquid::Expression2) - end +Benchmark.ips do |x| + x.config(time: 5, warmup: 5) + MARKUPS.each do |type, markups| + x.report("Liquid::Expression#parse: #{type}") do markups.each do |markup| - Liquid::Expression2.parse(markup) + Liquid::Expression.parse(markup) end end - - x.compare! - end -end - -Benchmark.ips do |x| - x.config(time: 10) - - x.report("Liquid::Expression1#parse: all") do - if Liquid::Expression != Liquid::Expression1 - Liquid.send(:remove_const, :Expression) - Liquid.const_set(:Expression, Liquid::Expression1) - end - - MARKUPS.values.flatten.each do |markup| - Liquid::Expression1.parse(markup) - end end - x.report("Liquid::Expression2#parse: all") do - if Liquid::Expression != Liquid::Expression2 - Liquid.send(:remove_const, :Expression) - Liquid.const_set(:Expression, Liquid::Expression2) - end - + x.report("Liquid::Expression#parse: all") do MARKUPS.values.flatten.each do |markup| - Liquid::Expression2.parse(markup) + Liquid::Expression.parse(markup) end end - - x.compare! end diff --git a/performance/unit/lexer_benchmark.rb b/performance/unit/lexer_benchmark.rb index 2faa1f686..fda6ce1f6 100644 --- a/performance/unit/lexer_benchmark.rb +++ b/performance/unit/lexer_benchmark.rb @@ -29,31 +29,12 @@ "foo | default: -1", ] -EXPRESSIONS.each do |expr| - lexer_1_result = Liquid::Lexer1.new(expr).tokenize - lexer_2_result = Liquid::Lexer2.new(expr).tokenize - - next if lexer_1_result == lexer_2_result - - warn "Lexer1 and Lexer2 results are different for expression: #{expr}" - warn "expected: #{lexer_1_result}" - warn "got: #{lexer_2_result}" - abort -end - Benchmark.ips do |x| x.config(time: 10, warmup: 5) - x.report("Liquid::Lexer1#tokenize") do - EXPRESSIONS.each do |expr| - l = Liquid::Lexer1.new(expr) - l.tokenize - end - end - - x.report("Liquid::Lexer2#tokenize") do + x.report("Liquid::Lexer#tokenize") do EXPRESSIONS.each do |expr| - l = Liquid::Lexer2.new(expr) + l = Liquid::Lexer.new(expr) l.tokenize end end diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index bb2c02e0b..190a6bec9 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -57,7 +57,6 @@ def test_quirky_negative_sign_expression_markup def test_expression_cache skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled - skip("Expression Caching is only available with Expression2") if Liquid::Expression != Liquid::Expression2 cache = LruRedux::Cache.new(10) template = <<~LIQUID @@ -79,7 +78,6 @@ def test_expression_cache def test_disable_expression_cache skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled - skip("Expression Caching is only available with Expression2") if Liquid::Expression != Liquid::Expression2 template = <<~LIQUID {% assign x = 1 %} From 0b8e30b81989601641d245094528c7543c1ef0a1 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 14:37:54 -0400 Subject: [PATCH 28/33] update unit test to be compatible with new source_location format --- test/unit/environment_filter_test.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/unit/environment_filter_test.rb b/test/unit/environment_filter_test.rb index 942f6a3c7..e710573b1 100644 --- a/test/unit/environment_filter_test.rb +++ b/test/unit/environment_filter_test.rb @@ -36,14 +36,14 @@ def test_strainer assert_equal("public", strainer.invoke("public_filter")) end - def test_stainer_raises_argument_error + def test_strainer_raises_argument_error strainer = @environment.create_strainer(@context) assert_raises(Liquid::ArgumentError) do strainer.invoke("public_filter", 1) end end - def test_stainer_argument_error_contains_backtrace + def test_strainer_argument_error_contains_backtrace strainer = @environment.create_strainer(@context) exception = assert_raises(Liquid::ArgumentError) do @@ -54,8 +54,9 @@ def test_stainer_argument_error_contains_backtrace /\ALiquid error: wrong number of arguments \((1 for 0|given 1, expected 0)\)\z/, exception.message, ) + source = AccessScopeFilters.instance_method(:public_filter).source_location - assert_equal(source.map(&:to_s), exception.backtrace[0].split(':')[0..1]) + assert_equal(source[0..1].map(&:to_s), exception.backtrace[0].split(':')[0..1]) end def test_strainer_only_invokes_public_filter_methods From 2a5ecf068feca41590804dd5ab41a4747fe0327d Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 14:38:50 -0400 Subject: [PATCH 29/33] remove lru_redux from Gemfile --- Gemfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Gemfile b/Gemfile index 6802722cb..013b1700a 100644 --- a/Gemfile +++ b/Gemfile @@ -8,7 +8,6 @@ end gemspec gem "base64" -gem "lru_redux" group :benchmark, :test do gem 'benchmark-ips' From bd05dfbd1c9a53a0ec3b43cdc97cd41ef21767b5 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 14:41:35 -0400 Subject: [PATCH 30/33] use lru_redux getset for simplicity --- lib/liquid/expression.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index f2abb2fc1..f10c25db0 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -44,9 +44,7 @@ def parse(markup, ss = StringScanner.new(""), cache = nil) # Cache only exists during parsing if cache - return cache[markup] if cache.key?(markup) - - cache[markup] = inner_parse(markup, ss, cache).freeze + cache.getset(markup) { inner_parse(markup, ss, cache).freeze } else inner_parse(markup, ss, nil).freeze end From 2e236b0a0ea9408a8d29643151a2c9f87ded18d4 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 14:58:23 -0400 Subject: [PATCH 31/33] fix expression cache to be compatible with Ruby Hash and LruCache --- lib/liquid/expression.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index f10c25db0..f2abb2fc1 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -44,7 +44,9 @@ def parse(markup, ss = StringScanner.new(""), cache = nil) # Cache only exists during parsing if cache - cache.getset(markup) { inner_parse(markup, ss, cache).freeze } + return cache[markup] if cache.key?(markup) + + cache[markup] = inner_parse(markup, ss, cache).freeze else inner_parse(markup, ss, nil).freeze end From a07ae905230172a40156cca7c7b2a6becdccb89d Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 15:07:57 -0400 Subject: [PATCH 32/33] use Ruby Hash as default expression cache --- Gemfile | 1 + lib/liquid/parse_context.rb | 8 +++-- liquid.gemspec | 1 - test/integration/expression_test.rb | 45 +++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/Gemfile b/Gemfile index 013b1700a..fd9d00b02 100644 --- a/Gemfile +++ b/Gemfile @@ -13,6 +13,7 @@ group :benchmark, :test do gem 'benchmark-ips' gem 'memory_profiler' gem 'terminal-table' + gem "lru_redux" install_if -> { RUBY_PLATFORM !~ /mingw|mswin|java/ && RUBY_ENGINE != 'truffleruby' } do gem 'stackprof' diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index 030529724..60cdf9e41 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -16,8 +16,12 @@ def initialize(options = Const::EMPTY_HASH) # This StringScanner will be shared by all of them @string_scanner = StringScanner.new("") - @expression_cache = if options[:expression_cache] != false - options[:expression_cache] || LruRedux::Cache.new(10_000) + @expression_cache = if options[:expression_cache].nil? + {} + elsif options[:expression_cache].respond_to?(:[]) && options[:expression_cache].respond_to?(:[]=) + options[:expression_cache] + elsif options[:expression_cache] + {} end self.depth = 0 diff --git a/liquid.gemspec b/liquid.gemspec index a58a36019..1b9a91004 100644 --- a/liquid.gemspec +++ b/liquid.gemspec @@ -30,7 +30,6 @@ Gem::Specification.new do |s| s.add_dependency("strscan", ">= 3.1.1") s.add_dependency("bigdecimal") - s.add_dependency("lru_redux") s.add_development_dependency('rake', '~> 13.0') s.add_development_dependency('minitest') diff --git a/test/integration/expression_test.rb b/test/integration/expression_test.rb index 190a6bec9..0eef5dc9d 100644 --- a/test/integration/expression_test.rb +++ b/test/integration/expression_test.rb @@ -58,6 +58,51 @@ def test_quirky_negative_sign_expression_markup def test_expression_cache skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + cache = {} + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + Liquid::Template.parse(template, expression_cache: cache).render + + assert_equal( + ["1", "2", "x", "y"], + cache.to_a.map { _1[0] }.sort, + ) + end + + def test_expression_cache_with_true_boolean + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + + template = <<~LIQUID + {% assign x = 1 %} + {{ x }} + {% assign x = 2 %} + {{ x }} + {% assign y = 1 %} + {{ y }} + LIQUID + + parse_context = ParseContext.new(expression_cache: true) + + Liquid::Template.parse(template, parse_context).render + + cache = parse_context.instance_variable_get(:@expression_cache) + + assert_equal( + ["1", "2", "x", "y"], + cache.to_a.map { _1[0] }.sort, + ) + end + + def test_expression_cache_with_lru_redux + skip("Liquid-C does not support Expression caching") if defined?(Liquid::C) && Liquid::C.enabled + cache = LruRedux::Cache.new(10) template = <<~LIQUID {% assign x = 1 %} From 10114b333eac9e46454e335dea9a6ba906552630 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Jan 2025 15:28:02 -0400 Subject: [PATCH 33/33] minor version bump --- lib/liquid/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/liquid/version.rb b/lib/liquid/version.rb index 52831e7b2..7c3004e50 100644 --- a/lib/liquid/version.rb +++ b/lib/liquid/version.rb @@ -2,5 +2,5 @@ # frozen_string_literal: true module Liquid - VERSION = "5.6.0" + VERSION = "5.6.1" end