Skip to content

Commit

Permalink
faster Expression parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ggmichaelgo committed Nov 5, 2024
1 parent 42b6763 commit e912bb0
Show file tree
Hide file tree
Showing 5 changed files with 294 additions and 7 deletions.
24 changes: 20 additions & 4 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,26 @@ namespace :benchmark do
end

desc "Run unit benchmarks"
task :unit do
Dir["./performance/unit/*_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
namespace :unit do
task :all do
Dir["./performance/unit/*_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end

task :lexer do
Dir["./performance/unit/lexer_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end

task :expression do
Dir["./performance/unit/expression_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end
end
end
Expand Down
89 changes: 88 additions & 1 deletion lib/liquid/expression.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

module Liquid
class Expression
class Expression1
LITERALS = {
nil => nil,
'nil' => nil,
Expand Down Expand Up @@ -45,4 +45,91 @@ def self.parse(markup)
end
end
end

class Expression2
LITERALS = {
nil => nil,
'nil' => nil,
'null' => nil,
'' => nil,
'true' => true,
'false' => false,
'blank' => '',
'empty' => ''
}.freeze

DOT = ".".ord
ZERO = "0".ord
NINE = "9".ord
DASH = "-".ord

# Use an atomic group (?>...) to avoid pathological backtracing from
# malicious input as described in https://github.com/Shopify/liquid/issues/1357
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/

def self.parse(markup)
return nil unless markup

markup = markup.strip # markup can be a frozen string

if (markup.start_with?('"') && markup.end_with?('"')) ||
(markup.start_with?("'") && markup.end_with?("'"))
return markup.byteslice(1, markup.length - 2)
elsif (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX
return RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2))
end

return LITERALS[markup] if LITERALS.key?(markup)

if (num = parse_number(markup))
num
else
VariableLookup.parse(markup)
end
end

def self.parse_number(markup)
ss = StringScanner.new(markup)
is_integer = true
last_dot_pos = nil
num_end_pos = nil

# the first byte must be a digit, a period, or a dash
byte = ss.scan_byte

return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE)

while (byte = ss.scan_byte)
return false if byte != DOT && (byte < ZERO || byte > NINE)

# we found our number and now we are just scanning the rest of the string
next if num_end_pos

if byte == DOT
if is_integer == false
num_end_pos = ss.pos - 1
else
is_integer = false
last_dot_pos = ss.pos
end
end
end

num_end_pos = markup.length if ss.eos?

return markup.to_i if is_integer

if num_end_pos
# number ends with a number "123.123"
markup.byteslice(0, num_end_pos).to_f
elsif last_dot_pos
markup.byteslice(0, last_dot_pos).to_f
else
# we should never reach this point
false
end
end
end

Expression = StringScanner.instance_methods.include?(:scan_byte) ? Expression2 : Expression1
end
4 changes: 2 additions & 2 deletions lib/liquid/variable_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def initialize(markup)

name = lookups.shift
if name&.start_with?('[') && name&.end_with?(']')
name = Expression.parse(name[1..-2])
name = Expression.parse(name.byteslice(1, name.length - 2))
end
@name = name

Expand All @@ -25,7 +25,7 @@ def initialize(markup)
@lookups.each_index do |i|
lookup = lookups[i]
if lookup&.start_with?('[') && lookup&.end_with?(']')
lookups[i] = Expression.parse(lookup[1..-2])
lookups[i] = Expression.parse(lookup.byteslice(1, lookup.length - 2))
elsif COMMAND_METHODS.include?(lookup)
@command_flags |= 1 << i
end
Expand Down
183 changes: 183 additions & 0 deletions performance/unit/expression_benchmark.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# frozen_string_literal: true

require "benchmark/ips"

# benchmark liquid lexing

require 'liquid'

RubyVM::YJIT.enable

STRING_MARKUPS = [
"\"foo\"",
"\"fooooooooooo\"",
"\"foooooooooooooooooooooooooooooo\"",
"'foo'",
"'fooooooooooo'",
"'foooooooooooooooooooooooooooooo'",
]

VARIABLE_MARKUPS = [
"article",
"article.title",
"article.title.size",
"very_long_variable_name_2024_11_05",
"very_long_variable_name_2024_11_05.size",
]

NUMBER_MARKUPS = [
"0",
"35",
"1241891024912849",
"3.5",
"3.51214128409128",
"12381902839.123819283910283",
"123.456.789",
"-123",
"-12.33",
"-405.231",
"-0",
"0",
"0.0",
"0.0000000000000000000000",
"0.00000000001",
]

RANGE_MARKUPS = [
"(1..30)",
"(1...30)",
"(1..30..5)",
"(1.0...30.0)",
"(1.........30)",
"(1..foo)",
"(foo..30)",
"(foo..bar)",
"(foo...bar...100)",
"(foo...bar...100.0)",
]

LITERAL_MARKUPS = [
nil,
'nil',
'null',
'',
'true',
'false',
'blank',
'empty',
]

MARKUPS = {
"string" => STRING_MARKUPS,
"literal" => LITERAL_MARKUPS,
"variable" => VARIABLE_MARKUPS,
"number" => NUMBER_MARKUPS,
"range" => RANGE_MARKUPS,
}

def compare_objects(object_1, object_2)
if object_1.is_a?(Liquid::VariableLookup) && object_2.is_a?(Liquid::VariableLookup)
return false if object_1.name != object_2.name
elsif object_1 != object_2
return false
end

true
end

def compare_range_lookup(expression_1_result, expression_2_result)
return false unless expression_1_result.is_a?(Liquid::RangeLookup) && expression_2_result.is_a?(Liquid::RangeLookup)

start_obj_1 = expression_1_result.start_obj
start_obj_2 = expression_2_result.start_obj

return false unless compare_objects(start_obj_1, start_obj_2)

end_obj_1 = expression_1_result.end_obj
end_obj_2 = expression_2_result.end_obj

return false unless compare_objects(end_obj_1, end_obj_2)

true
end

MARKUPS.values.flatten.each do |markup|
expression_1_result = Liquid::Expression1.parse(markup)
expression_2_result = Liquid::Expression2.parse(markup)

next if expression_1_result == expression_2_result

if expression_1_result.is_a?(Liquid::RangeLookup) && expression_2_result.is_a?(Liquid::RangeLookup)
next if compare_range_lookup(expression_1_result, expression_2_result)
end

warn "Expression1 and Expression2 results are different for markup: #{markup}"
warn "expected: #{expression_1_result}"
warn "got: #{expression_2_result}"
abort
end

warmed_up = false

MARKUPS.each do |type, markups|
Benchmark.ips do |x|
if warmed_up
x.config(time: 10, warmup: 5)
warmed_up = true
else
x.config(time: 10)
end

x.report("Liquid::Expression1#parse: #{type}") do
if Liquid::Expression != Liquid::Expression1
Liquid.send(:remove_const, :Expression)
Liquid.const_set(:Expression, Liquid::Expression1)
end

markups.each do |markup|
Liquid::Expression1.parse(markup)
end
end

x.report("Liquid::Expression2#parse: #{type}") do
if Liquid::Expression != Liquid::Expression2
Liquid.send(:remove_const, :Expression)
Liquid.const_set(:Expression, Liquid::Expression2)
end

markups.each do |markup|
Liquid::Expression2.parse(markup)
end
end

x.compare!
end
end

Benchmark.ips do |x|
x.config(time: 10)

x.report("Liquid::Expression1#parse: all") do
if Liquid::Expression != Liquid::Expression1
Liquid.send(:remove_const, :Expression)
Liquid.const_set(:Expression, Liquid::Expression1)
end

MARKUPS.values.flatten.each do |markup|
Liquid::Expression1.parse(markup)
end
end

x.report("Liquid::Expression2#parse: all") do
if Liquid::Expression != Liquid::Expression2
Liquid.send(:remove_const, :Expression)
Liquid.const_set(:Expression, Liquid::Expression2)
end

MARKUPS.values.flatten.each do |markup|
Liquid::Expression2.parse(markup)
end
end

x.compare!
end
1 change: 1 addition & 0 deletions test/integration/expression_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def test_int
end

def test_float
assert_template_result("-17.42", "{{ -17.42 }}")
assert_template_result("2.5", "{{ 2.5 }}")
assert_expression_result(1.5, "1.5")
end
Expand Down

0 comments on commit e912bb0

Please sign in to comment.