-
Notifications
You must be signed in to change notification settings - Fork 136
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix S3 to SQL database types (#2911)
- Loading branch information
1 parent
db1c628
commit b063df0
Showing
7 changed files
with
208 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
package neosync_benthos_sql | ||
|
||
import ( | ||
"context" | ||
"encoding/binary" | ||
"encoding/json" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/warpstreamlabs/bento/public/service" | ||
) | ||
|
||
func jsonToSqlProcessorConfig() *service.ConfigSpec { | ||
return service.NewConfigSpec().Field(service.NewStringMapField("column_data_types")) | ||
} | ||
|
||
func RegisterJsonToSqlProcessor(env *service.Environment) error { | ||
return env.RegisterBatchProcessor( | ||
"json_to_sql", | ||
jsonToSqlProcessorConfig(), | ||
func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { | ||
proc, err := newJsonToSqlProcessor(conf, mgr) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return proc, nil | ||
}) | ||
} | ||
|
||
type jsonToSqlProcessor struct { | ||
logger *service.Logger | ||
columnDataTypes map[string]string // column name to datatype | ||
} | ||
|
||
func newJsonToSqlProcessor(conf *service.ParsedConfig, mgr *service.Resources) (*jsonToSqlProcessor, error) { | ||
columnDataTypes, err := conf.FieldStringMap("column_data_types") | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &jsonToSqlProcessor{ | ||
logger: mgr.Logger(), | ||
columnDataTypes: columnDataTypes, | ||
}, nil | ||
} | ||
|
||
func (p *jsonToSqlProcessor) ProcessBatch(ctx context.Context, batch service.MessageBatch) ([]service.MessageBatch, error) { | ||
newBatch := make(service.MessageBatch, 0, len(batch)) | ||
for _, msg := range batch { | ||
root, err := msg.AsStructuredMut() | ||
if err != nil { | ||
return nil, err | ||
} | ||
newRoot := p.transform("", root) | ||
newMsg := msg.Copy() | ||
newMsg.SetStructured(newRoot) | ||
newBatch = append(newBatch, newMsg) | ||
} | ||
|
||
if len(newBatch) == 0 { | ||
return nil, nil | ||
} | ||
return []service.MessageBatch{newBatch}, nil | ||
} | ||
|
||
func (m *jsonToSqlProcessor) Close(context.Context) error { | ||
return nil | ||
} | ||
|
||
// [bigint binary blob char date datetime decimal double enum float int int json longblob longtext mediumblob mediumint mediumtext set set smallint text time timestamp tinyblob tinyint tinytext varbinary varchar year] | ||
func (p *jsonToSqlProcessor) transform(path string, root any) any { | ||
switch v := root.(type) { | ||
case map[string]any: | ||
newMap := make(map[string]any) | ||
for k, v2 := range v { | ||
newValue := p.transform(k, v2) | ||
newMap[k] = newValue | ||
} | ||
return newMap | ||
case nil: | ||
return v | ||
case []byte: | ||
datatype, ok := p.columnDataTypes[path] | ||
if !ok { | ||
return v | ||
} | ||
if strings.EqualFold(datatype, "bit") { | ||
bit, err := convertStringToBit(string(v)) | ||
if err != nil { | ||
p.logger.Errorf("unable to convert bit string to SQL bit []byte: %w", err) | ||
return v | ||
} | ||
return bit | ||
} else if strings.EqualFold(datatype, "json") { | ||
validJson, err := getValidJson(v) | ||
if err != nil { | ||
p.logger.Errorf("unable to get valid json: %w", err) | ||
return v | ||
} | ||
return validJson | ||
} | ||
return v | ||
default: | ||
return v | ||
} | ||
} | ||
|
||
// handles case where json strings are not quoted | ||
func getValidJson(jsonData []byte) ([]byte, error) { | ||
isValidJson := json.Valid(jsonData) | ||
if isValidJson { | ||
return jsonData, nil | ||
} | ||
|
||
quotedData, err := json.Marshal(string(jsonData)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return quotedData, nil | ||
} | ||
|
||
func convertStringToBit(bitString string) ([]byte, error) { | ||
val, err := strconv.ParseUint(bitString, 2, len(bitString)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// Always allocate 8 bytes for PutUint64 | ||
bytes := make([]byte, 8) | ||
binary.BigEndian.PutUint64(bytes, val) | ||
|
||
// Calculate actual needed bytes and return only those | ||
neededBytes := (len(bitString) + 7) / 8 | ||
return bytes[len(bytes)-neededBytes:], nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package neosync_benthos_sql | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func Test_convertStringToBit(t *testing.T) { | ||
t.Run("8 bits", func(t *testing.T) { | ||
got, err := convertStringToBit("10101010") | ||
require.NoError(t, err) | ||
expected := []byte{170} | ||
require.Equalf(t, expected, got, "got %v, want %v", got, expected) | ||
}) | ||
|
||
t.Run("1 bit", func(t *testing.T) { | ||
got, err := convertStringToBit("1") | ||
require.NoError(t, err) | ||
expected := []byte{1} | ||
require.Equalf(t, expected, got, "got %v, want %v", got, expected) | ||
}) | ||
|
||
t.Run("16 bits", func(t *testing.T) { | ||
got, err := convertStringToBit("1010101010101010") | ||
require.NoError(t, err) | ||
expected := []byte{170, 170} | ||
require.Equalf(t, expected, got, "got %v, want %v", got, expected) | ||
}) | ||
|
||
t.Run("24 bits", func(t *testing.T) { | ||
got, err := convertStringToBit("101010101111111100000000") | ||
require.NoError(t, err) | ||
expected := []byte{170, 255, 0} | ||
require.Equalf(t, expected, got, "got %v, want %v", got, expected) | ||
}) | ||
|
||
t.Run("invalid binary string", func(t *testing.T) { | ||
_, err := convertStringToBit("102") | ||
require.Error(t, err) | ||
}) | ||
|
||
t.Run("empty string", func(t *testing.T) { | ||
_, err := convertStringToBit("") | ||
require.Error(t, err) | ||
}) | ||
} |