From d48610164fd291532f00f6929c48a7f0417e29e2 Mon Sep 17 00:00:00 2001 From: colmsnowplow Date: Wed, 26 Jun 2024 18:16:19 +0100 Subject: [PATCH] Remove needless data copying Original implementation did something silly and inefficient. --- pkg/transform/transform.go | 5 +- pkg/transform/transform_test.go | 120 +++++++++++++++++++++++++++----- 2 files changed, 104 insertions(+), 21 deletions(-) diff --git a/pkg/transform/transform.go b/pkg/transform/transform.go index fbb43dff..ec1f7acc 100644 --- a/pkg/transform/transform.go +++ b/pkg/transform/transform.go @@ -37,9 +37,8 @@ func NewTransformation(tranformFunctions ...TransformationFunction) Transformati return models.NewTransformationResult(messages, filteredList, failureList) } - for _, message := range messages { - msg := *message // dereference to avoid amending input - success := &msg // success must be both input and output to a TransformationFunction, so we make this pointer. + for _, success := range messages { + // success is the input and output, so we name it as such here var failure *models.Message var filtered *models.Message var intermediate interface{} diff --git a/pkg/transform/transform_test.go b/pkg/transform/transform_test.go index 5355252d..776fed4a 100644 --- a/pkg/transform/transform_test.go +++ b/pkg/transform/transform_test.go @@ -13,7 +13,6 @@ package transform import ( "testing" - "time" "github.com/stretchr/testify/assert" @@ -24,6 +23,25 @@ import ( func TestNewTransformation_Passthrough(t *testing.T) { assert := assert.New(t) + Msgs := []*models.Message{ + { + Data: SnowplowTsv1, + PartitionKey: "some-key", + }, + { + Data: SnowplowTsv2, + PartitionKey: "some-key1", + }, + { + Data: SnowplowTsv3, + PartitionKey: "some-key2", + }, + { + Data: nonSnowplowString, + PartitionKey: "some-key4", + }, + } + // expected is equal to messages, specifying separately to avoid false positive if we accidentally mutate input. var expected = []*models.Message{ { @@ -46,7 +64,7 @@ func TestNewTransformation_Passthrough(t *testing.T) { expectedNoTransformRes := models.NewTransformationResult(expected, make([]*models.Message, 0, 0), make([]*models.Message, 0, 0)) noTransform := NewTransformation(make([]TransformationFunction, 0, 0)...) - noTransformResult := noTransform(Messages) + noTransformResult := noTransform(Msgs) assert.Equal(expectedNoTransformRes, noTransformResult) } @@ -54,6 +72,25 @@ func TestNewTransformation_Passthrough(t *testing.T) { func TestNewTransformation_EnrichedToJson(t *testing.T) { assert := assert.New(t) + Msgs := []*models.Message{ + { + Data: SnowplowTsv1, + PartitionKey: "some-key", + }, + { + Data: SnowplowTsv2, + PartitionKey: "some-key1", + }, + { + Data: SnowplowTsv3, + PartitionKey: "some-key2", + }, + { + Data: nonSnowplowString, + PartitionKey: "some-key4", + }, + } + var expectedGood = []*models.Message{ { Data: snowplowJSON1, @@ -69,18 +106,13 @@ func TestNewTransformation_EnrichedToJson(t *testing.T) { }, } - tranformEnrichJSON := NewTransformation(SpEnrichedToJSON) - enrichJSONRes := tranformEnrichJSON(Messages) + transformEnrichJSON := NewTransformation(SpEnrichedToJSON) + enrichJSONRes := transformEnrichJSON(Msgs) for index, value := range enrichJSONRes.Result { assert.JSONEq(string(expectedGood[index].Data), string(value.Data)) assert.Equal(expectedGood[index].PartitionKey, value.PartitionKey) assert.NotNil(expectedGood[index].TimeTransformed) - - // assertions to ensure we don't accidentally modify the input - assert.NotEqual(Messages[index].Data, value.Data) - // assert can't seem to deal with comparing zero value to non-zero value, so assert that it's still zero instead - assert.Equal(time.Time{}, Messages[index].TimeTransformed) } // Not matching equivalence of whole object because error stacktrace makes it unfeasible. Doing each component part instead. @@ -95,9 +127,28 @@ func TestNewTransformation_EnrichedToJson(t *testing.T) { } func Benchmark_Transform_EnrichToJson(b *testing.B) { + // Because we modify input, copy this first + Msgs := []*models.Message{ + { + Data: SnowplowTsv1, + PartitionKey: "some-key", + }, + { + Data: SnowplowTsv2, + PartitionKey: "some-key1", + }, + { + Data: SnowplowTsv3, + PartitionKey: "some-key2", + }, + { + Data: nonSnowplowString, + PartitionKey: "some-key4", + }, + } tranformEnrichJSON := NewTransformation(SpEnrichedToJSON) for i := 0; i < b.N; i++ { - tranformEnrichJSON(Messages) + tranformEnrichJSON(Msgs) } } @@ -106,15 +157,54 @@ func testfunc(message *models.Message, intermediateState interface{}) (*models.M } func Benchmark_Transform_Passthrough(b *testing.B) { + // Because transform funcs modify input, copy this first + Msgs := []*models.Message{ + { + Data: SnowplowTsv1, + PartitionKey: "some-key", + }, + { + Data: SnowplowTsv2, + PartitionKey: "some-key1", + }, + { + Data: SnowplowTsv3, + PartitionKey: "some-key2", + }, + { + Data: nonSnowplowString, + PartitionKey: "some-key4", + }, + } tranformPassthrough := NewTransformation(testfunc) for i := 0; i < b.N; i++ { - tranformPassthrough(Messages) + tranformPassthrough(Msgs) } } func TestNewTransformation_Multiple(t *testing.T) { assert := assert.New(t) + // Because transform funcs modify input, copy this first + Msgs := []*models.Message{ + { + Data: SnowplowTsv1, + PartitionKey: "some-key", + }, + { + Data: SnowplowTsv2, + PartitionKey: "some-key1", + }, + { + Data: SnowplowTsv3, + PartitionKey: "some-key2", + }, + { + Data: nonSnowplowString, + PartitionKey: "some-key4", + }, + } + var expectedGood = []*models.Message{ { Data: snowplowJSON1, @@ -133,19 +223,13 @@ func TestNewTransformation_Multiple(t *testing.T) { setPkToAppID, _ := NewSpEnrichedSetPkFunction("app_id") tranformMultiple := NewTransformation(setPkToAppID, SpEnrichedToJSON) - enrichJSONRes := tranformMultiple(Messages) + enrichJSONRes := tranformMultiple(Msgs) for index, value := range enrichJSONRes.Result { assert.JSONEq(string(expectedGood[index].Data), string(value.Data)) assert.Equal(expectedGood[index].PartitionKey, value.PartitionKey) assert.NotNil(expectedGood[index].TimeTransformed) assert.NotNil(value.TimeTransformed) - - // assertions to ensure we don't accidentally modify the input - assert.NotEqual(Messages[index].Data, value.Data) - assert.NotEqual(Messages[index].PartitionKey, value.PartitionKey) - // assert can't seem to deal with comparing zero value to non-zero value, so assert that it's still zero instead - assert.Equal(time.Time{}, Messages[index].TimeTransformed) } // Not matching equivalence of whole object because error stacktrace makes it unfeasible. Doing each component part instead.