Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: clean invalid UTF8 from trace field values when generating propagation header #232

Merged
merged 7 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,7 @@ Metrics/ParameterLists:
Max: 6

Style/AccessModifierDeclarations:
Exclude:
- lib/honeycomb/propagation/aws.rb
- lib/honeycomb/propagation/w3c.rb
- lib/honeycomb/propagation/honeycomb.rb
- lib/honeycomb/propagation/default.rb
Enabled: false

Style/FrozenStringLiteralComment:
EnforcedStyle: always
Expand Down
2 changes: 0 additions & 2 deletions lib/honeycomb/propagation/default_modern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@ def parse_rack_env(env)
[nil, nil, nil, nil]
end
end
# rubocop:disable Style/AccessModifierDeclarations
module_function :parse_rack_env
public :parse_rack_env
# rubocop:enable Style/AccessModifierDeclarations
end
end
end
36 changes: 25 additions & 11 deletions lib/honeycomb/propagation/honeycomb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
require "base64"
require "json"
require "uri"
require "libhoney/cleaner"

module Honeycomb
# Parsing and propagation for honeycomb trace headers
Expand Down Expand Up @@ -41,7 +42,7 @@ def parse_v1(payload)
when "parent_id"
parent_span_id = value
when "context"
Base64.decode64(value).tap do |json|
Base64.urlsafe_decode64(value).tap do |json|
trace_fields = JSON.parse json
rescue JSON::ParserError
trace_fields = {}
Expand All @@ -58,14 +59,17 @@ def parse_v1(payload)

# Serialize trace headers
module MarshalTraceContext
# for cleaning data in trace fields before serializing to prop header value
include Libhoney::Cleaner
# promote cleaner instance methods to module methods so that self.to_trace_header can use them
module_function :clean_data, :clean_string
JamieDanielson marked this conversation as resolved.
Show resolved Hide resolved

def to_trace_header
context = Base64.urlsafe_encode64(JSON.generate(trace.fields)).strip
encoded_dataset = URI.encode_www_form_component(builder.dataset)
data_to_propogate = [
"dataset=#{encoded_dataset}",
"dataset=#{encode_dataset(builder.dataset)}",
"trace_id=#{trace.id}",
"parent_id=#{id}",
"context=#{context}",
"context=#{encode_trace_fields(trace.fields)}",
]
"1;#{data_to_propogate.join(',')}"
end
Expand All @@ -77,18 +81,28 @@ def self.parse_faraday_env(_env, propagation_context)
end

def self.to_trace_header(propagation_context)
fields = propagation_context.trace_fields
context = Base64.urlsafe_encode64(JSON.generate(fields)).strip
dataset = propagation_context.dataset
encoded_dataset = URI.encode_www_form_component(dataset)
data_to_propogate = [
"dataset=#{encoded_dataset}",
"dataset=#{encode_dataset(propagation_context.dataset)}",
"trace_id=#{propagation_context.trace_id}",
"parent_id=#{propagation_context.parent_id}",
"context=#{context}",
"context=#{encode_trace_fields(propagation_context.trace_fields)}",
]
"1;#{data_to_propogate.join(',')}"
end

def encode_trace_fields(fields)
Base64.urlsafe_encode64(
JSON.generate(
clean_data(fields),
),
).strip
end
module_function :encode_trace_fields

def encode_dataset(dataset)
URI.encode_www_form_component(dataset)
end
module_function :encode_dataset
end
end
end
24 changes: 18 additions & 6 deletions lib/honeycomb/propagation/honeycomb_modern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
require "base64"
require "json"
require "uri"
require "libhoney/cleaner"

module Honeycomb
# Parsing and propagation for honeycomb trace headers
Expand Down Expand Up @@ -39,7 +40,7 @@ def parse_v1(payload)
when "parent_id"
parent_span_id = value
when "context"
Base64.decode64(value).tap do |json|
Base64.urlsafe_decode64(value).tap do |json|
trace_fields = JSON.parse json
rescue JSON::ParserError
trace_fields = {}
Expand All @@ -56,12 +57,16 @@ def parse_v1(payload)

# Serialize trace headers
module MarshalTraceContext
# for cleaning data in trace fields before serializing to prop header value
include Libhoney::Cleaner
# promote cleaner instance methods to module methods so that self.to_trace_header can use them
module_function :clean_data, :clean_string

def to_trace_header
context = Base64.urlsafe_encode64(JSON.generate(trace.fields)).strip
data_to_propogate = [
"trace_id=#{trace.id}",
"parent_id=#{id}",
"context=#{context}",
"context=#{encode_trace_fields(trace.fields)}",
]
"1;#{data_to_propogate.join(',')}"
end
Expand All @@ -73,15 +78,22 @@ def self.parse_faraday_env(_env, propagation_context)
end

def self.to_trace_header(propagation_context)
fields = propagation_context.trace_fields
context = Base64.urlsafe_encode64(JSON.generate(fields)).strip
data_to_propogate = [
"trace_id=#{propagation_context.trace_id}",
"parent_id=#{propagation_context.parent_id}",
"context=#{context}",
"context=#{encode_trace_fields(propagation_context.trace_fields)}",
]
"1;#{data_to_propogate.join(',')}"
end

def encode_trace_fields(fields)
Base64.urlsafe_encode64(
JSON.generate(
clean_data(fields),
),
).strip
end
module_function :encode_trace_fields
end
end
end
40 changes: 33 additions & 7 deletions spec/honeycomb/propagation/honeycomb_modern_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,21 @@
nil,
]
end

it "handles previously-encoded invalid utf8" do
JamieDanielson marked this conversation as resolved.
Show resolved Hide resolved
encoded_fields = "eyJ0ZXN0IjoiaG9uZXljb21iIiwiaW52YWxpZF91dGY4Ijoi77-9In0="
expected_fields = { "test" => "honeycomb", "invalid_utf8" => "�" }

serialized_trace =
"1;trace_id=trace_id,parent_id=parent_id,context=#{encoded_fields}"

expect(propagation.parse(serialized_trace)).to eq [
"trace_id",
"parent_id",
expected_fields,
nil,
]
end
end

RSpec.describe Honeycomb::HoneycombModernPropagation::UnmarshalTraceContext do
Expand All @@ -92,26 +107,33 @@
end

RSpec.describe Honeycomb::HoneycombModernPropagation::MarshalTraceContext do
let(:fields) { { "test" => "honeycomb", "invalid_utf8" => "\xAE" } }
let(:expected_encoded_fields) { "eyJ0ZXN0IjoiaG9uZXljb21iIiwiaW52YWxpZF91dGY4Ijoi77-9In0=" }

describe "module usage" do
let(:builder) { instance_double("Builder", dataset: "rails") }
let(:trace) { instance_double("Trace", id: 2, fields: {}) }
let(:trace) do
instance_double("Trace", id: 2, fields: fields)
end
let(:span) do
instance_double("Span", id: 1, trace: trace, builder: builder)
.extend(subject)
end

it "can serialize a basic span and not include the dataset" do
it "can serialize a span with fields and handle invalid UTF8 and not include the dataset" do
expect(span.to_trace_header)
.to eq("1;trace_id=2,parent_id=1,context=e30=")
.to eq("1;trace_id=2,parent_id=1,context=#{expected_encoded_fields}")
end
end

describe "class method usage" do
let(:context) { Honeycomb::Propagation::Context.new(2, 1, {}, "rails") }
let(:context) do
Honeycomb::Propagation::Context.new(2, 1, fields, "rails")
end

it "can serialize a basic span and not include the dataset" do
it "can serialize a span with fields and handle invalid UTF8 and not include the dataset" do
expect(subject.to_trace_header(context))
.to eq("1;trace_id=2,parent_id=1,context=e30=")
.to eq("1;trace_id=2,parent_id=1,context=#{expected_encoded_fields}")
end
end
end
Expand All @@ -123,6 +145,7 @@
let(:fields) do
{
"test" => "honeycomb",
"invalid_utf8" => "\xAE",
}
end
let(:builder) { instance_double("Builder", dataset: dataset) }
Expand Down Expand Up @@ -151,6 +174,9 @@
end

it "produces the correct fields" do
expect(output[2]).to eq fields
expect(output[2]).to eq(
"test" => "honeycomb",
"invalid_utf8" => "�",
)
end
end
40 changes: 33 additions & 7 deletions spec/honeycomb/propagation/honeycomb_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,21 @@
nil,
]
end

it "handles previously-encoded invalid utf8" do
encoded_fields = "eyJ0ZXN0IjoiaG9uZXljb21iIiwiaW52YWxpZF91dGY4Ijoi77-9In0="
expected_fields = { "test" => "honeycomb", "invalid_utf8" => "�" }

serialized_trace =
"1;trace_id=trace_id,parent_id=parent_id,context=#{encoded_fields}"

expect(propagation.parse(serialized_trace)).to eq [
"trace_id",
"parent_id",
expected_fields,
nil,
]
end
end

RSpec.describe Honeycomb::HoneycombPropagation::UnmarshalTraceContext do
Expand All @@ -81,26 +96,33 @@
end

RSpec.describe Honeycomb::HoneycombPropagation::MarshalTraceContext do
let(:fields) { { "test" => "honeycomb", "invalid_utf8" => "\xAE" } }
let(:expected_encoded_fields) { "eyJ0ZXN0IjoiaG9uZXljb21iIiwiaW52YWxpZF91dGY4Ijoi77-9In0=" }

describe "module usage" do
let(:builder) { instance_double("Builder", dataset: "rails") }
let(:trace) { instance_double("Trace", id: 2, fields: {}) }
let(:trace) do
instance_double("Trace", id: 2, fields: fields)
end
let(:span) do
instance_double("Span", id: 1, trace: trace, builder: builder)
.extend(subject)
end

it "can serialize a basic span" do
it "can serialize a span with fields and handle invalid UTF8" do
expect(span.to_trace_header)
.to eq("1;dataset=rails,trace_id=2,parent_id=1,context=e30=")
.to eq("1;dataset=rails,trace_id=2,parent_id=1,context=#{expected_encoded_fields}")
end
end

describe "class method usage" do
let(:context) { Honeycomb::Propagation::Context.new(2, 1, {}, "rails") }
let(:context) do
Honeycomb::Propagation::Context.new(2, 1, fields, "rails")
end

it "can serialize a basic span" do
it "can serialize a span with fields and handle invalid UTF8" do
expect(subject.to_trace_header(context))
.to eq("1;dataset=rails,trace_id=2,parent_id=1,context=e30=")
.to eq("1;dataset=rails,trace_id=2,parent_id=1,context=#{expected_encoded_fields}")
end
end
end
Expand All @@ -112,6 +134,7 @@
let(:fields) do
{
"test" => "honeycomb",
"invalid_utf8" => "\xAE",
}
end
let(:builder) { instance_double("Builder", dataset: dataset) }
Expand Down Expand Up @@ -140,6 +163,9 @@
end

it "produces the correct fields" do
expect(output[2]).to eq fields
expect(output[2]).to eq(
"test" => "honeycomb",
"invalid_utf8" => "�",
)
end
end