From 9a06f7e4d29dc7896b46650982a16a049938b99e Mon Sep 17 00:00:00 2001 From: alishakawaguchi Date: Thu, 31 Oct 2024 10:29:22 -0700 Subject: [PATCH] Updates CLI and Worker to used shared benthos builder (#2882) --- backend/pkg/sqlmanager/sql-manager.go | 2 +- cli/internal/cmds/neosync/sync/config.go | 6 +- cli/internal/cmds/neosync/sync/dynamodb.go | 86 - cli/internal/cmds/neosync/sync/job.go | 108 + cli/internal/cmds/neosync/sync/sync.go | 531 ++--- .../neosync/sync/sync_integration_test.go | 22 + cli/internal/cmds/neosync/sync/sync_test.go | 142 -- cli/internal/cmds/neosync/sync/ui.go | 11 +- cli/internal/cmds/neosync/sync/util.go | 30 +- go.mod | 2 +- .../benthos-builder/benthos-builder.go | 324 +++ .../benthos-builder/builders/aws-s3.go | 179 ++ .../builders/benthos-builder_test.go | 1537 ++++++++++++++ .../benthos-builder/builders/dynamodb.go | 129 +- .../benthos-builder/builders/dynamodb_test.go | 2 +- .../builders/gcp-cloud-storage.go | 88 + .../benthos-builder/builders/generate-ai.go | 145 +- .../benthos-builder/builders}/generate.go | 194 +- .../benthos-builder/builders/mongodb.go | 89 +- .../builders/neosync-connection-data.go | 102 + .../benthos-builder/builders}/processors.go | 78 +- .../builders}/processors_test.go | 75 +- .../benthos-builder/builders/sql-util.go | 577 ++++++ .../benthos/benthos-builder/builders/sql.go | 471 +++++ .../benthos-builder/builders/sql_test.go | 90 +- .../benthos-builder/generate-benthos.go | 150 ++ .../benthos/benthos-builder/internal/types.go | 158 ++ .../benthos/benthos-builder/shared/types.go | 37 + .../testutil/testcontainers/redis/redis.go | 58 + .../mysql/humanresources/create-tables.sql | 29 + .../postgres/humanresources/create-tables.sql | 20 + worker/pkg/benthos/config.go | 2 - .../neosync_connection_data_input.go | 6 +- worker/pkg/benthos/sql/output_sql_insert.go | 10 +- worker/pkg/query-builder2/wrapper.go | 26 + .../gen-benthos-configs/activity.go | 35 +- .../gen-benthos-configs/benthos-builder.go | 460 +--- .../benthos-builder_test.go | 1615 +------------- .../activities/gen-benthos-configs/sync.go | 1168 ----------- .../activities/gen-benthos-configs/utils.go | 57 - .../gen-benthos-configs/utils_test.go | 70 - .../datasync/activities/sync/activity.go | 5 +- .../workflows/datasync/workflow/workflow.go | 31 +- .../datasync/workflow/workflow_test.go | 1846 ++++++++--------- 44 files changed, 5814 insertions(+), 4989 deletions(-) create mode 100644 cli/internal/cmds/neosync/sync/job.go delete mode 100644 cli/internal/cmds/neosync/sync/sync_test.go create mode 100644 internal/benthos/benthos-builder/benthos-builder.go create mode 100644 internal/benthos/benthos-builder/builders/aws-s3.go create mode 100644 internal/benthos/benthos-builder/builders/benthos-builder_test.go rename worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb.go => internal/benthos/benthos-builder/builders/dynamodb.go (54%) rename worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb_test.go => internal/benthos/benthos-builder/builders/dynamodb_test.go (98%) create mode 100644 internal/benthos/benthos-builder/builders/gcp-cloud-storage.go rename worker/pkg/workflows/datasync/activities/gen-benthos-configs/ai-generate.go => internal/benthos/benthos-builder/builders/generate-ai.go (56%) rename {worker/pkg/workflows/datasync/activities/gen-benthos-configs => internal/benthos/benthos-builder/builders}/generate.go (55%) rename worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-mongo.go => internal/benthos/benthos-builder/builders/mongodb.go (50%) create mode 100644 internal/benthos/benthos-builder/builders/neosync-connection-data.go rename {worker/pkg/workflows/datasync/activities/gen-benthos-configs => internal/benthos/benthos-builder/builders}/processors.go (92%) rename {worker/pkg/workflows/datasync/activities/gen-benthos-configs => internal/benthos/benthos-builder/builders}/processors_test.go (80%) create mode 100644 internal/benthos/benthos-builder/builders/sql-util.go create mode 100644 internal/benthos/benthos-builder/builders/sql.go rename worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync_test.go => internal/benthos/benthos-builder/builders/sql_test.go (59%) create mode 100644 internal/benthos/benthos-builder/generate-benthos.go create mode 100644 internal/benthos/benthos-builder/internal/types.go create mode 100644 internal/benthos/benthos-builder/shared/types.go create mode 100644 internal/testutil/testcontainers/redis/redis.go create mode 100644 worker/pkg/query-builder2/wrapper.go delete mode 100644 worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync.go delete mode 100644 worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils.go delete mode 100644 worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils_test.go diff --git a/backend/pkg/sqlmanager/sql-manager.go b/backend/pkg/sqlmanager/sql-manager.go index cd26f6bb9b..157a68c5cd 100644 --- a/backend/pkg/sqlmanager/sql-manager.go +++ b/backend/pkg/sqlmanager/sql-manager.go @@ -364,7 +364,7 @@ func (s *SqlManager) NewSqlDbFromUrl( func GetColumnOverrideAndResetProperties(driver string, cInfo *sqlmanager_shared.ColumnInfo) (needsOverride, needsReset bool, err error) { switch driver { - case sqlmanager_shared.PostgresDriver: + case sqlmanager_shared.PostgresDriver, "postgres": needsOverride, needsReset := sqlmanager_postgres.GetPostgresColumnOverrideAndResetProperties(cInfo) return needsOverride, needsReset, nil case sqlmanager_shared.MysqlDriver: diff --git a/cli/internal/cmds/neosync/sync/config.go b/cli/internal/cmds/neosync/sync/config.go index 977273f507..883de1efdd 100644 --- a/cli/internal/cmds/neosync/sync/config.go +++ b/cli/internal/cmds/neosync/sync/config.go @@ -148,8 +148,8 @@ func buildCmdConfig(cmd *cobra.Command) (*cmdConfig, error) { if err != nil { return nil, err } - config.Debug = debug + config.Debug = debug if cmd.Flags().Changed("destination-open-limit") { openLimit, err := cmd.Flags().GetInt32("destination-open-limit") if err != nil { @@ -196,6 +196,10 @@ func isConfigValid(cmd *cmdConfig, logger *slog.Logger, sourceConnection *mgmtv1 return errors.New("GCP Cloud Storage source connection type requires job-id or job-run-id") } + if (sourceConnectionType == awsS3Connection || sourceConnectionType == gcpCloudStorageConnection) && cmd.Destination.InitSchema { + return errors.New("init schema is only supported when source is a SQL Database") + } + if cmd.Destination.TruncateCascade && cmd.Destination.Driver == mysqlDriver { return fmt.Errorf("truncate cascade is only supported in postgres") } diff --git a/cli/internal/cmds/neosync/sync/dynamodb.go b/cli/internal/cmds/neosync/sync/dynamodb.go index 81a7923c96..da85ce96cd 100644 --- a/cli/internal/cmds/neosync/sync/dynamodb.go +++ b/cli/internal/cmds/neosync/sync/dynamodb.go @@ -1,8 +1,6 @@ package sync_cmd import ( - tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/spf13/cobra" ) @@ -59,87 +57,3 @@ func buildAwsCredConfig(cmd *cobra.Command, config *cmdConfig) (*cmdConfig, erro } return config, nil } - -func generateDynamoDbBenthosConfig( - cmd *cmdConfig, - table string, -) *benthosConfigResponse { - bc := &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Logger: &neosync_benthos.LoggerConfig{ - Level: "ERROR", - AddTimestamp: true, - }, - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - NeosyncConnectionData: &neosync_benthos.NeosyncConnectionData{ - // ApiKey: authToken, - // ApiUrl: apiUrl, - ConnectionId: cmd.Source.ConnectionId, - ConnectionType: string(awsDynamoDBConnection), - Schema: "dynamodb", - Table: table, - }, - }, - }, - Pipeline: &neosync_benthos.PipelineConfig{}, - Output: &neosync_benthos.OutputConfig{ - Outputs: neosync_benthos.Outputs{ - AwsDynamoDB: &neosync_benthos.OutputAwsDynamoDB{ - Table: table, - JsonMapColumns: map[string]string{ - "": ".", - }, - - Batching: &neosync_benthos.Batching{ - // https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html - // A single call to BatchWriteItem can transmit up to 16MB of data over the network, consisting of up to 25 item put or delete operations - // Specifying the count here may not be enough if the overall data is above 16MB. - // Benthos will fall back on error to single writes however - Period: "5s", - Count: 25, - }, - - Region: cmd.AwsDynamoDbDestination.AwsCredConfig.Region, - Endpoint: *cmd.AwsDynamoDbDestination.AwsCredConfig.Endpoint, - Credentials: buildBenthosAwsCredentials(cmd), - }, - }, - }, - }, - } - return &benthosConfigResponse{ - Name: table, - Config: bc, - DependsOn: []*tabledependency.DependsOn{}, - Table: table, - Columns: []string{}, - } -} - -func buildBenthosAwsCredentials(cmd *cmdConfig) *neosync_benthos.AwsCredentials { - if cmd.AwsDynamoDbDestination == nil || cmd.AwsDynamoDbDestination.AwsCredConfig == nil { - return nil - } - cc := cmd.AwsDynamoDbDestination.AwsCredConfig - creds := &neosync_benthos.AwsCredentials{} - if cc.Profile != nil { - creds.Profile = *cc.Profile - } - if cc.AccessKeyID != nil { - creds.Id = *cc.AccessKeyID - } - if cc.SecretAccessKey != nil { - creds.Secret = *cc.SecretAccessKey - } - if cc.SessionToken != nil { - creds.Token = *cc.SessionToken - } - if cc.RoleARN != nil { - creds.Role = *cc.RoleARN - } - if cc.RoleExternalID != nil { - creds.RoleExternalId = *cc.RoleExternalID - } - return creds -} diff --git a/cli/internal/cmds/neosync/sync/job.go b/cli/internal/cmds/neosync/sync/job.go new file mode 100644 index 0000000000..046e877fc2 --- /dev/null +++ b/cli/internal/cmds/neosync/sync/job.go @@ -0,0 +1,108 @@ +package sync_cmd + +import ( + "fmt" + + "github.com/google/uuid" + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" +) + +func createJob( + cmd *cmdConfig, + sourceConnection *mgmtv1alpha1.Connection, + destinationConnection *mgmtv1alpha1.Connection, + sourceSchema []*mgmtv1alpha1.DatabaseColumn, +) (*mgmtv1alpha1.Job, error) { + sourceConnOpts, err := toJobSourceOption(sourceConnection) + if err != nil { + return nil, err + } + jobId := uuid.NewString() + if cmd.Source.ConnectionOpts != nil && cmd.Source.ConnectionOpts.JobId != nil && *cmd.Source.ConnectionOpts.JobId != "" { + jobId = *cmd.Source.ConnectionOpts.JobId + } + return &mgmtv1alpha1.Job{ + Id: jobId, + Name: "cli-sync", + AccountId: *cmd.AccountId, + Source: &mgmtv1alpha1.JobSource{ + Options: sourceConnOpts, + }, + Destinations: []*mgmtv1alpha1.JobDestination{toJobDestination(cmd, destinationConnection)}, + Mappings: toJobMappings(sourceSchema), + }, nil +} + +func toJobDestination(cmd *cmdConfig, destinationConnection *mgmtv1alpha1.Connection) *mgmtv1alpha1.JobDestination { + return &mgmtv1alpha1.JobDestination{ + ConnectionId: destinationConnection.Id, + Id: uuid.NewString(), + Options: cmdConfigToDestinationConnectionOptions(cmd), + } +} + +func toJobSourceOption(sourceConnection *mgmtv1alpha1.Connection) (*mgmtv1alpha1.JobSourceOptions, error) { + switch sourceConnection.ConnectionConfig.Config.(type) { + case *mgmtv1alpha1.ConnectionConfig_PgConfig: + return &mgmtv1alpha1.JobSourceOptions{ + Config: &mgmtv1alpha1.JobSourceOptions_Postgres{ + Postgres: &mgmtv1alpha1.PostgresSourceConnectionOptions{ + ConnectionId: sourceConnection.Id, + }, + }, + }, nil + case *mgmtv1alpha1.ConnectionConfig_MysqlConfig: + return &mgmtv1alpha1.JobSourceOptions{ + Config: &mgmtv1alpha1.JobSourceOptions_Mysql{ + Mysql: &mgmtv1alpha1.MysqlSourceConnectionOptions{ + ConnectionId: sourceConnection.Id, + }, + }, + }, nil + case *mgmtv1alpha1.ConnectionConfig_AwsS3Config: + return &mgmtv1alpha1.JobSourceOptions{ + Config: &mgmtv1alpha1.JobSourceOptions_AwsS3{ + AwsS3: &mgmtv1alpha1.AwsS3SourceConnectionOptions{ + ConnectionId: sourceConnection.Id, + }, + }, + }, nil + default: + return nil, fmt.Errorf("unsupported connection type") + } +} + +// if is generated and not idenity then set to generate default +func toJobMappings(sourceSchema []*mgmtv1alpha1.DatabaseColumn) []*mgmtv1alpha1.JobMapping { + mappings := []*mgmtv1alpha1.JobMapping{} + + for _, colInfo := range sourceSchema { + mappings = append(mappings, &mgmtv1alpha1.JobMapping{ + Schema: colInfo.Schema, + Table: colInfo.Table, + Column: colInfo.Column, + Transformer: toTransformer(colInfo), + }) + } + + return mappings +} + +func toTransformer(colInfo *mgmtv1alpha1.DatabaseColumn) *mgmtv1alpha1.JobMappingTransformer { + if colInfo.GeneratedType != nil && colInfo.GetGeneratedType() != "" { + return &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{ + GenerateDefaultConfig: &mgmtv1alpha1.GenerateDefault{}, + }, + }, + } + } + return &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{ + PassthroughConfig: &mgmtv1alpha1.Passthrough{}, + }, + }, + } +} diff --git a/cli/internal/cmds/neosync/sync/sync.go b/cli/internal/cmds/neosync/sync/sync.go index 327e388a9e..7923b6214a 100644 --- a/cli/internal/cmds/neosync/sync/sync.go +++ b/cli/internal/cmds/neosync/sync/sync.go @@ -27,18 +27,17 @@ import ( tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" "github.com/nucleuscloud/neosync/cli/internal/auth" "github.com/nucleuscloud/neosync/cli/internal/output" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" connectiontunnelmanager "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager" pool_sql_provider "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager/pool/providers/sql" "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager/providers" "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager/providers/mongoprovider" "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager/providers/sqlprovider" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/spf13/cobra" "golang.org/x/sync/errgroup" "gopkg.in/yaml.v2" benthos_environment "github.com/nucleuscloud/neosync/worker/pkg/benthos/environment" - _ "github.com/nucleuscloud/neosync/worker/pkg/benthos/sql" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" "github.com/warpstreamlabs/bento/public/bloblang" _ "github.com/warpstreamlabs/bento/public/components/aws" @@ -109,6 +108,7 @@ type sqlDestinationConfig struct { OnConflict onConflictConfig `yaml:"on-conflict,omitempty"` ConnectionOpts sqlConnectionOptions `yaml:"connection-opts,omitempty"` } + type sqlConnectionOptions struct { OpenLimit *int32 `yaml:"open-limit,omitempty"` IdleLimit *int32 `yaml:"idle-limit,omitempty"` @@ -183,14 +183,17 @@ func NewCmd() *cobra.Command { } type clisync struct { - connectiondataclient mgmtv1alpha1connect.ConnectionDataServiceClient - connectionclient mgmtv1alpha1connect.ConnectionServiceClient - sqlmanagerclient *sqlmanager.SqlManager - sqlconnector *sqlconnect.SqlOpenConnector - benv *service.Environment - cmd *cmdConfig - logger *slog.Logger - ctx context.Context + connectiondataclient mgmtv1alpha1connect.ConnectionDataServiceClient + connectionclient mgmtv1alpha1connect.ConnectionServiceClient + transformerclient mgmtv1alpha1connect.TransformersServiceClient + sqlmanagerclient *sqlmanager.SqlManager + sqlconnector *sqlconnect.SqlOpenConnector + benv *service.Environment + sourceConnection *mgmtv1alpha1.Connection + destinationConnection *mgmtv1alpha1.Connection + cmd *cmdConfig + logger *slog.Logger + ctx context.Context } func sync( @@ -219,6 +222,7 @@ func sync( connectInterceptorOption := connect.WithInterceptors(connectInterceptors...) connectionclient := mgmtv1alpha1connect.NewConnectionServiceClient(httpclient, neosyncurl, connectInterceptorOption) connectiondataclient := mgmtv1alpha1connect.NewConnectionDataServiceClient(httpclient, neosyncurl, connectInterceptorOption) + transformerclient := mgmtv1alpha1connect.NewTransformersServiceClient(httpclient, neosyncurl, connectInterceptorOption) pgpoolmap := &syncmap.Map{} mysqlpoolmap := &syncmap.Map{} @@ -232,6 +236,7 @@ func sync( sync := &clisync{ connectiondataclient: connectiondataclient, connectionclient: connectionclient, + transformerclient: transformerclient, sqlmanagerclient: sqlmanagerclient, sqlconnector: sqlConnector, cmd: cmd, @@ -243,6 +248,16 @@ func sync( } func (c *clisync) configureAndRunSync() error { + c.logger.Debug("Retrieving neosync connection") + connResp, err := c.connectionclient.GetConnection(c.ctx, connect.NewRequest(&mgmtv1alpha1.GetConnectionRequest{ + Id: c.cmd.Source.ConnectionId, + })) + if err != nil { + return err + } + sourceConnection := connResp.Msg.GetConnection() + c.sourceConnection = sourceConnection + connectionprovider := providers.NewProvider( mongoprovider.NewProvider(), sqlprovider.NewProvider(c.sqlconnector), @@ -261,6 +276,8 @@ func (c *clisync) configureAndRunSync() error { sqlDsn = c.cmd.Destination.ConnectionUrl } dsnToConnIdMap.Store(sqlDsn, destConnection.Id) + dsnToConnIdMap.Store(sourceConnection.Id, sourceConnection.Id) + dsnToConnIdMap.Store(destConnection.Id, destConnection.Id) stopChan := make(chan error, 3) ctx, cancel := context.WithCancel(c.ctx) defer cancel() @@ -282,7 +299,8 @@ func (c *clisync) configureAndRunSync() error { tunnelmanager, dsnToConnIdMap, map[string]*mgmtv1alpha1.Connection{ - destConnection.Id: destConnection, + destConnection.Id: destConnection, + sourceConnection.Id: sourceConnection, }, session, c.logger, @@ -299,6 +317,7 @@ func (c *clisync) configureAndRunSync() error { return err } c.benv = benthosEnv + c.destinationConnection = destConnection groupedConfigs, err := c.configureSync() if err != nil { @@ -311,134 +330,28 @@ func (c *clisync) configureAndRunSync() error { return runSync(c.ctx, *c.cmd.OutputType, c.benv, groupedConfigs, c.logger) } -func (c *clisync) configureSync() ([][]*benthosConfigResponse, error) { - c.logger.Debug("Retrieving neosync connection") - connResp, err := c.connectionclient.GetConnection(c.ctx, connect.NewRequest(&mgmtv1alpha1.GetConnectionRequest{ - Id: c.cmd.Source.ConnectionId, - })) - if err != nil { - return nil, err - } - sourceConnection := connResp.Msg.GetConnection() - sourceConnectionType, err := getConnectionType(sourceConnection) +func (c *clisync) configureSync() ([][]*benthosbuilder.BenthosConfigResponse, error) { + sourceConnectionType, err := getConnectionType(c.sourceConnection) if err != nil { return nil, err } c.logger.Debug(fmt.Sprintf("Source connection type: %s", sourceConnectionType)) - err = isConfigValid(c.cmd, c.logger, sourceConnection, sourceConnectionType) + err = isConfigValid(c.cmd, c.logger, c.sourceConnection, sourceConnectionType) if err != nil { return nil, err } c.logger.Debug("Validated config") c.logger.Info("Retrieving connection schema...") - var schemaConfig *schemaConfig - switch sourceConnectionType { - case awsS3Connection: - c.logger.Info("Building schema and table constraints...") - var cfg *mgmtv1alpha1.AwsS3SchemaConfig - if c.cmd.Source.ConnectionOpts.JobRunId != nil && *c.cmd.Source.ConnectionOpts.JobRunId != "" { - cfg = &mgmtv1alpha1.AwsS3SchemaConfig{Id: &mgmtv1alpha1.AwsS3SchemaConfig_JobRunId{JobRunId: *c.cmd.Source.ConnectionOpts.JobRunId}} - } else if c.cmd.Source.ConnectionOpts.JobId != nil && *c.cmd.Source.ConnectionOpts.JobId != "" { - cfg = &mgmtv1alpha1.AwsS3SchemaConfig{Id: &mgmtv1alpha1.AwsS3SchemaConfig_JobId{JobId: *c.cmd.Source.ConnectionOpts.JobId}} - } - s3Config := &mgmtv1alpha1.ConnectionSchemaConfig{ - Config: &mgmtv1alpha1.ConnectionSchemaConfig_AwsS3Config{ - AwsS3Config: cfg, - }, - } - - schemaCfg, err := c.getDestinationSchemaConfig(sourceConnection, s3Config) - if err != nil { - return nil, err - } - if len(schemaCfg.Schemas) == 0 { - c.logger.Warn("No tables found when building destination schema from s3.") - return nil, nil - } - schemaConfig = schemaCfg - case gcpCloudStorageConnection: - var cfg *mgmtv1alpha1.GcpCloudStorageSchemaConfig - if c.cmd.Source.ConnectionOpts.JobRunId != nil && *c.cmd.Source.ConnectionOpts.JobRunId != "" { - cfg = &mgmtv1alpha1.GcpCloudStorageSchemaConfig{Id: &mgmtv1alpha1.GcpCloudStorageSchemaConfig_JobRunId{JobRunId: *c.cmd.Source.ConnectionOpts.JobRunId}} - } else if c.cmd.Source.ConnectionOpts.JobId != nil && *c.cmd.Source.ConnectionOpts.JobId != "" { - cfg = &mgmtv1alpha1.GcpCloudStorageSchemaConfig{Id: &mgmtv1alpha1.GcpCloudStorageSchemaConfig_JobId{JobId: *c.cmd.Source.ConnectionOpts.JobId}} - } - gcpConfig := &mgmtv1alpha1.ConnectionSchemaConfig{ - Config: &mgmtv1alpha1.ConnectionSchemaConfig_GcpCloudstorageConfig{ - GcpCloudstorageConfig: cfg, - }, - } - - schemaCfg, err := c.getDestinationSchemaConfig(sourceConnection, gcpConfig) - if err != nil { - return nil, err - } - if len(schemaCfg.Schemas) == 0 { - c.logger.Warn("No tables found when building destination schema from gcp cloud storage.") - return nil, nil - } - schemaConfig = schemaCfg - case mysqlConnection: - c.logger.Info("Building schema and table constraints...") - mysqlCfg := &mgmtv1alpha1.ConnectionSchemaConfig{ - Config: &mgmtv1alpha1.ConnectionSchemaConfig_MysqlConfig{ - MysqlConfig: &mgmtv1alpha1.MysqlSchemaConfig{}, - }, - } - schemaCfg, err := c.getConnectionSchemaConfig(sourceConnection, mysqlCfg) - if err != nil { - return nil, err - } - if len(schemaCfg.Schemas) == 0 { - c.logger.Warn("No tables found when building destination schema from mysql.") - return nil, nil - } - schemaConfig = schemaCfg - case postgresConnection: - c.logger.Info("Building schema and table constraints...") - postgresConfig := &mgmtv1alpha1.ConnectionSchemaConfig{ - Config: &mgmtv1alpha1.ConnectionSchemaConfig_PgConfig{ - PgConfig: &mgmtv1alpha1.PostgresSchemaConfig{}, - }, - } - schemaCfg, err := c.getConnectionSchemaConfig(sourceConnection, postgresConfig) - if err != nil { - return nil, err - } - if len(schemaCfg.Schemas) == 0 { - c.logger.Warn("No tables found when building destination schema from postgres.") - return nil, nil - } - schemaConfig = schemaCfg - case awsDynamoDBConnection: - dynamoConfig := &mgmtv1alpha1.ConnectionSchemaConfig{ - Config: &mgmtv1alpha1.ConnectionSchemaConfig_DynamodbConfig{ - DynamodbConfig: &mgmtv1alpha1.DynamoDBSchemaConfig{}, - }, - } - schemaCfg, err := c.getConnectionSchemaConfig(sourceConnection, dynamoConfig) - if err != nil { - return nil, err - } - if len(schemaCfg.Schemas) == 0 { - c.logger.Warn("No tables found when building destination schema from dynamodb.") - return nil, nil - } - tableMap := map[string]struct{}{} - for _, s := range schemaCfg.Schemas { - tableMap[s.Table] = struct{}{} - } - configs := []*benthosConfigResponse{} - for t := range tableMap { - benthosConfig := generateDynamoDbBenthosConfig(c.cmd, t) - configs = append(configs, benthosConfig) - } - return [][]*benthosConfigResponse{configs}, nil - default: - return nil, fmt.Errorf("this connection type is not currently supported: %T", sourceConnectionType) + schemaConfig, err := c.getConnectionSchemaConfig() + if err != nil { + return nil, err + } + if len(schemaConfig.Schemas) == 0 { + c.logger.Warn("No tables found when building schema from source") + return nil, nil } c.logger.Debug("Building sync configs") @@ -454,10 +367,47 @@ func (c *clisync) configureSync() ([][]*benthosConfigResponse, error) { syncConfigCount := len(syncConfigs) c.logger.Info(fmt.Sprintf("Generating %d sync configs...", syncConfigCount)) - configs := []*benthosConfigResponse{} - for _, cfg := range syncConfigs { - benthosConfig := generateBenthosConfig(c.cmd, sourceConnectionType, cfg) - configs = append(configs, benthosConfig) + + job, err := createJob(c.cmd, c.sourceConnection, c.destinationConnection, schemaConfig.Schemas) + if err != nil { + c.logger.Error("unable to create job") + return nil, err + } + + var jobRunId *string + if c.cmd.Source.ConnectionOpts != nil { + jobRunId = c.cmd.Source.ConnectionOpts.JobRunId + } + var databaseDriver *string + if c.cmd.Destination.Driver == postgresDriver { + d := string(c.cmd.Destination.Driver) + databaseDriver = &d + } + + // TODO move more logic to builders + benthosManagerConfig := &benthosbuilder.CliBenthosConfig{ + Job: job, + SourceConnection: c.sourceConnection, + SourceJobRunId: jobRunId, + DestinationConnection: c.destinationConnection, + SyncConfigs: syncConfigs, + RunId: "cli-sync", + Logger: c.logger, + Sqlmanagerclient: c.sqlmanagerclient, + Transformerclient: c.transformerclient, + Connectiondataclient: c.connectiondataclient, + RedisConfig: nil, + MetricsEnabled: false, + PostgresDriverOverride: databaseDriver, + } + bm, err := benthosbuilder.NewCliBenthosConfigManager(benthosManagerConfig) + if err != nil { + return nil, err + } + configs, err := bm.GenerateBenthosConfigs(c.ctx) + if err != nil { + c.logger.Error("unable to build benthos configs") + return nil, err } // order configs in run order by dependency @@ -467,12 +417,61 @@ func (c *clisync) configureSync() ([][]*benthosConfigResponse, error) { return groupedConfigs, nil } +func (c *clisync) getConnectionSchemaConfigByConnectionType(connection *mgmtv1alpha1.Connection) (*mgmtv1alpha1.ConnectionSchemaConfig, error) { + switch conn := connection.GetConnectionConfig().GetConfig().(type) { + case *mgmtv1alpha1.ConnectionConfig_PgConfig: + return &mgmtv1alpha1.ConnectionSchemaConfig{ + Config: &mgmtv1alpha1.ConnectionSchemaConfig_PgConfig{ + PgConfig: &mgmtv1alpha1.PostgresSchemaConfig{}, + }, + }, nil + case *mgmtv1alpha1.ConnectionConfig_MysqlConfig: + return &mgmtv1alpha1.ConnectionSchemaConfig{ + Config: &mgmtv1alpha1.ConnectionSchemaConfig_MysqlConfig{ + MysqlConfig: &mgmtv1alpha1.MysqlSchemaConfig{}, + }, + }, nil + case *mgmtv1alpha1.ConnectionConfig_DynamodbConfig: + return &mgmtv1alpha1.ConnectionSchemaConfig{ + Config: &mgmtv1alpha1.ConnectionSchemaConfig_DynamodbConfig{ + DynamodbConfig: &mgmtv1alpha1.DynamoDBSchemaConfig{}, + }, + }, nil + case *mgmtv1alpha1.ConnectionConfig_GcpCloudstorageConfig: + var cfg *mgmtv1alpha1.GcpCloudStorageSchemaConfig + if c.cmd.Source.ConnectionOpts.JobRunId != nil && *c.cmd.Source.ConnectionOpts.JobRunId != "" { + cfg = &mgmtv1alpha1.GcpCloudStorageSchemaConfig{Id: &mgmtv1alpha1.GcpCloudStorageSchemaConfig_JobRunId{JobRunId: *c.cmd.Source.ConnectionOpts.JobRunId}} + } else if c.cmd.Source.ConnectionOpts.JobId != nil && *c.cmd.Source.ConnectionOpts.JobId != "" { + cfg = &mgmtv1alpha1.GcpCloudStorageSchemaConfig{Id: &mgmtv1alpha1.GcpCloudStorageSchemaConfig_JobId{JobId: *c.cmd.Source.ConnectionOpts.JobId}} + } + return &mgmtv1alpha1.ConnectionSchemaConfig{ + Config: &mgmtv1alpha1.ConnectionSchemaConfig_GcpCloudstorageConfig{ + GcpCloudstorageConfig: cfg, + }, + }, nil + case *mgmtv1alpha1.ConnectionConfig_AwsS3Config: + var cfg *mgmtv1alpha1.AwsS3SchemaConfig + if c.cmd.Source.ConnectionOpts.JobRunId != nil && *c.cmd.Source.ConnectionOpts.JobRunId != "" { + cfg = &mgmtv1alpha1.AwsS3SchemaConfig{Id: &mgmtv1alpha1.AwsS3SchemaConfig_JobRunId{JobRunId: *c.cmd.Source.ConnectionOpts.JobRunId}} + } else if c.cmd.Source.ConnectionOpts.JobId != nil && *c.cmd.Source.ConnectionOpts.JobId != "" { + cfg = &mgmtv1alpha1.AwsS3SchemaConfig{Id: &mgmtv1alpha1.AwsS3SchemaConfig_JobId{JobId: *c.cmd.Source.ConnectionOpts.JobId}} + } + return &mgmtv1alpha1.ConnectionSchemaConfig{ + Config: &mgmtv1alpha1.ConnectionSchemaConfig_AwsS3Config{ + AwsS3Config: cfg, + }, + }, nil + default: + return nil, fmt.Errorf("unable to build connection schema config: unsupported connection type (%T)", conn) + } +} + var ( // Hack that locks the instanced bento stream builder build step that causes data races if done in parallel streamBuilderMu syncmap.Mutex ) -func syncData(ctx context.Context, benv *service.Environment, cfg *benthosConfigResponse, logger *slog.Logger, outputType output.OutputType) error { +func syncData(ctx context.Context, benv *service.Environment, cfg *benthosbuilder.BenthosConfigResponse, logger *slog.Logger, outputType output.OutputType) error { configbits, err := yaml.Marshal(cfg.Config) if err != nil { return err @@ -510,12 +509,20 @@ func syncData(ctx context.Context, benv *service.Environment, cfg *benthosConfig return fmt.Errorf("failed to create StreamBuilder") } if outputType == output.PlainOutput { - streambldr.SetLogger(logger.With("benthos", "true", "table", cfg.Table, "runType", runType)) + streambldr.SetLogger(logger.With("benthos", "true", "schema", cfg.TableSchema, "table", cfg.TableName, "runType", runType)) } if benv == nil { return fmt.Errorf("benthos env is nil") } + envKeyDsnSyncMap := syncmap.Map{} + for _, bdsn := range cfg.BenthosDsns { + envKeyDsnSyncMap.Store(bdsn.EnvVarKey, bdsn.ConnectionId) + } + + envKeyMap := syncMapToStringMap(&envKeyDsnSyncMap) + // This must come before SetYaml as otherwise it will not be invoked + streambldr.SetEnvVarLookupFunc(getEnvVarLookupFn(envKeyMap)) err = streambldr.SetYAML(string(configbits)) if err != nil { return fmt.Errorf("unable to convert benthos config to yaml for stream builder: %w", err) @@ -637,6 +644,80 @@ func cmdConfigToDestinationConnection(cmd *cmdConfig) *mgmtv1alpha1.Connection { return &mgmtv1alpha1.Connection{} } +func getEnvVarLookupFn(input map[string]string) func(key string) (string, bool) { + return func(key string) (string, bool) { + if input == nil { + return "", false + } + out, ok := input[key] + return out, ok + } +} + +func syncMapToStringMap(incoming *syncmap.Map) map[string]string { + out := map[string]string{} + if incoming == nil { + return out + } + + incoming.Range(func(key, value any) bool { + keyStr, ok := key.(string) + if !ok { + return true + } + valStr, ok := value.(string) + if !ok { + return true + } + out[keyStr] = valStr + return true + }) + return out +} + +func cmdConfigToDestinationConnectionOptions(cmd *cmdConfig) *mgmtv1alpha1.JobDestinationOptions { + if cmd.Destination != nil { + switch cmd.Destination.Driver { + case postgresDriver: + return &mgmtv1alpha1.JobDestinationOptions{ + Config: &mgmtv1alpha1.JobDestinationOptions_PostgresOptions{ + PostgresOptions: &mgmtv1alpha1.PostgresDestinationConnectionOptions{ + TruncateTable: &mgmtv1alpha1.PostgresTruncateTableConfig{ + TruncateBeforeInsert: cmd.Destination.TruncateBeforeInsert, + Cascade: cmd.Destination.TruncateCascade, + }, + InitTableSchema: cmd.Destination.InitSchema, + OnConflict: &mgmtv1alpha1.PostgresOnConflictConfig{ + DoNothing: cmd.Destination.OnConflict.DoNothing, + }, + }, + }, + } + case mysqlDriver: + return &mgmtv1alpha1.JobDestinationOptions{ + Config: &mgmtv1alpha1.JobDestinationOptions_MysqlOptions{ + MysqlOptions: &mgmtv1alpha1.MysqlDestinationConnectionOptions{ + TruncateTable: &mgmtv1alpha1.MysqlTruncateTableConfig{ + TruncateBeforeInsert: cmd.Destination.TruncateBeforeInsert, + }, + InitTableSchema: cmd.Destination.InitSchema, + OnConflict: &mgmtv1alpha1.MysqlOnConflictConfig{ + DoNothing: cmd.Destination.OnConflict.DoNothing, + }, + }, + }, + } + } + } else if cmd.AwsDynamoDbDestination != nil { + return &mgmtv1alpha1.JobDestinationOptions{ + Config: &mgmtv1alpha1.JobDestinationOptions_AwsS3Options{ + AwsS3Options: &mgmtv1alpha1.AwsS3DestinationConnectionOptions{}, + }, + } + } + return &mgmtv1alpha1.JobDestinationOptions{} +} + func (c *clisync) runDestinationInitStatements( syncConfigs []*tabledependency.RunConfig, schemaConfig *schemaConfig, @@ -784,103 +865,6 @@ func getTableInitStatementMap( return nil, nil } -type benthosConfigResponse struct { - Name string - DependsOn []*tabledependency.DependsOn - Config *neosync_benthos.BenthosConfig - Table string - Columns []string -} - -func generateBenthosConfig( - cmd *cmdConfig, - connectionType ConnectionType, - syncConfig *tabledependency.RunConfig, -) *benthosConfigResponse { - schema, table := sqlmanager_shared.SplitTableKey(syncConfig.Table()) - - var jobId, jobRunId *string - if cmd.Source.ConnectionOpts != nil { - jobRunId = cmd.Source.ConnectionOpts.JobRunId - jobId = cmd.Source.ConnectionOpts.JobId - } - - bc := &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Logger: &neosync_benthos.LoggerConfig{ - Level: "ERROR", - AddTimestamp: true, - }, - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - NeosyncConnectionData: &neosync_benthos.NeosyncConnectionData{ - ConnectionId: cmd.Source.ConnectionId, - ConnectionType: string(connectionType), - JobId: jobId, - JobRunId: jobRunId, - Schema: schema, - Table: table, - }, - }, - }, - Pipeline: &neosync_benthos.PipelineConfig{}, - Output: &neosync_benthos.OutputConfig{}, - }, - } - - if syncConfig.RunType() == tabledependency.RunTypeUpdate { - args := syncConfig.InsertColumns() - args = append(args, syncConfig.PrimaryKeys()...) - bc.Output = &neosync_benthos.OutputConfig{ - Outputs: neosync_benthos.Outputs{ - PooledSqlUpdate: &neosync_benthos.PooledSqlUpdate{ - Driver: string(cmd.Destination.Driver), - Dsn: cmd.Destination.ConnectionUrl, - - Schema: schema, - Table: table, - Columns: syncConfig.InsertColumns(), - WhereColumns: syncConfig.PrimaryKeys(), - ArgsMapping: buildPlainInsertArgs(args), - - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }, - }, - } - } else { - bc.Output = &neosync_benthos.OutputConfig{ - Outputs: neosync_benthos.Outputs{ - PooledSqlInsert: &neosync_benthos.PooledSqlInsert{ - Driver: string(cmd.Destination.Driver), - Dsn: cmd.Destination.ConnectionUrl, - - Schema: schema, - Table: table, - Columns: syncConfig.SelectColumns(), - OnConflictDoNothing: cmd.Destination.OnConflict.DoNothing, - ArgsMapping: buildPlainInsertArgs(syncConfig.SelectColumns()), - - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }, - }, - } - } - - return &benthosConfigResponse{ - Name: fmt.Sprintf("%s.%s", syncConfig.Table(), syncConfig.RunType()), - Config: bc, - DependsOn: syncConfig.DependsOn(), - Table: syncConfig.Table(), - Columns: syncConfig.InsertColumns(), - } -} - type schemaConfig struct { Schemas []*mgmtv1alpha1.DatabaseColumn TableConstraints map[string][]*sql_manager.ForeignConstraint @@ -890,7 +874,22 @@ type schemaConfig struct { InitSchemaStatements []*mgmtv1alpha1.SchemaInitStatements } -func (c *clisync) getConnectionSchemaConfig( +func (c *clisync) getConnectionSchemaConfig() (*schemaConfig, error) { + connSchemaCfg, err := c.getConnectionSchemaConfigByConnectionType(c.sourceConnection) + if err != nil { + return nil, err + } + switch conn := c.sourceConnection.GetConnectionConfig().GetConfig().(type) { + case *mgmtv1alpha1.ConnectionConfig_PgConfig, *mgmtv1alpha1.ConnectionConfig_MysqlConfig, *mgmtv1alpha1.ConnectionConfig_DynamodbConfig: + return c.getSourceConnectionSchemaConfig(c.sourceConnection, connSchemaCfg) + case *mgmtv1alpha1.ConnectionConfig_GcpCloudstorageConfig, *mgmtv1alpha1.ConnectionConfig_AwsS3Config: + return c.getDestinationSchemaConfig(c.sourceConnection, connSchemaCfg) + default: + return nil, fmt.Errorf("unable to build connection schema config: unsupported connection type (%T)", conn) + } +} + +func (c *clisync) getSourceConnectionSchemaConfig( connection *mgmtv1alpha1.Connection, sc *mgmtv1alpha1.ConnectionSchemaConfig, ) (*schemaConfig, error) { @@ -967,25 +966,47 @@ func (c *clisync) getConnectionSchemaConfig( } func (c *clisync) getDestinationSchemaConfig( - connection *mgmtv1alpha1.Connection, + sourceConnection *mgmtv1alpha1.Connection, sc *mgmtv1alpha1.ConnectionSchemaConfig, ) (*schemaConfig, error) { schemaResp, err := c.connectiondataclient.GetConnectionSchema(c.ctx, connect.NewRequest(&mgmtv1alpha1.GetConnectionSchemaRequest{ - ConnectionId: connection.Id, + ConnectionId: sourceConnection.Id, SchemaConfig: sc, })) if err != nil { return nil, fmt.Errorf("unable to retrieve connection schema for connection: %w", err) } + sourceSchemas := schemaResp.Msg.GetSchemas() - tableColMap := getTableColMap(schemaResp.Msg.GetSchemas()) + destSchemas, err := c.getDestinationSchemas() + if err != nil { + return nil, fmt.Errorf("unable to retrieve destination connection schema for connection: %w", err) + } + + tableColMap := getTableColMap(sourceSchemas) if len(tableColMap) == 0 { c.logger.Warn("no tables found after retrieving connection schema.") return &schemaConfig{}, nil } + hydratedSchemas := sourceSchemas + if len(destSchemas) != 0 { + hydratedSchemas = []*mgmtv1alpha1.DatabaseColumn{} + destColMap := map[string]*mgmtv1alpha1.DatabaseColumn{} + for _, col := range destSchemas { + destColMap[fmt.Sprintf("%s.%s.%s", col.Schema, col.Table, col.Column)] = col + } + for _, col := range sourceSchemas { + destCol, ok := destColMap[fmt.Sprintf("%s.%s.%s", col.Schema, col.Table, col.Column)] + if ok { + col = destCol + } + hydratedSchemas = append(hydratedSchemas, col) + } + } + schemaMap := map[string]struct{}{} - for _, s := range schemaResp.Msg.GetSchemas() { + for _, s := range sourceSchemas { schemaMap[s.Schema] = struct{}{} } schemas := []string{} @@ -996,7 +1017,7 @@ func (c *clisync) getDestinationSchemaConfig( c.logger.Info(fmt.Sprintf("Building table constraints for %d schemas...", len(schemas))) tableConstraints, err := c.getDestinationTableConstraints(schemas) if err != nil { - return nil, fmt.Errorf("unable to build destination tablle constraints: %w", err) + return nil, fmt.Errorf("unable to build destination table constraints: %w", err) } primaryKeys := map[string]*mgmtv1alpha1.PrimaryConstraint{} @@ -1033,7 +1054,7 @@ func (c *clisync) getDestinationSchemaConfig( } return &schemaConfig{ - Schemas: schemaResp.Msg.GetSchemas(), + Schemas: hydratedSchemas, TableConstraints: tableConstraints.ForeignKeyConstraints, TablePrimaryKeys: primaryKeys, TruncateTableStatementsMap: truncateTableStatementsMap, @@ -1056,3 +1077,39 @@ func (c *clisync) getDestinationTableConstraints(schemas []string) (*sql_manager return constraints, nil } + +func (c *clisync) getDestinationSchemas() ([]*mgmtv1alpha1.DatabaseColumn, error) { + cctx, cancel := context.WithDeadline(c.ctx, time.Now().Add(5*time.Second)) + defer cancel() + db, err := c.sqlmanagerclient.NewSqlDbFromUrl(cctx, string(c.cmd.Destination.Driver), c.cmd.Destination.ConnectionUrl) + if err != nil { + return nil, err + } + defer db.Db.Close() + + dbschema, err := db.Db.GetDatabaseSchema(cctx) + if err != nil { + return nil, err + } + schemas := []*mgmtv1alpha1.DatabaseColumn{} + for _, col := range dbschema { + col := col + var defaultColumn *string + if col.ColumnDefault != "" { + defaultColumn = &col.ColumnDefault + } + + schemas = append(schemas, &mgmtv1alpha1.DatabaseColumn{ + Schema: col.TableSchema, + Table: col.TableName, + Column: col.ColumnName, + DataType: col.DataType, + IsNullable: col.IsNullable, + ColumnDefault: defaultColumn, + GeneratedType: col.GeneratedType, + IdentityGeneration: col.IdentityGeneration, + }) + } + + return schemas, nil +} diff --git a/cli/internal/cmds/neosync/sync/sync_integration_test.go b/cli/internal/cmds/neosync/sync/sync_integration_test.go index a95b0b4f7c..888f6aa6da 100644 --- a/cli/internal/cmds/neosync/sync/sync_integration_test.go +++ b/cli/internal/cmds/neosync/sync/sync_integration_test.go @@ -78,6 +78,17 @@ func Test_Sync(t *testing.T) { } err := sync.configureAndRunSync() require.NoError(t, err) + + rows := postgres.Target.DB.QueryRow(ctx, "select count(*) from humanresources.employees;") + var rowCount int + err = rows.Scan(&rowCount) + require.NoError(t, err) + require.Greater(t, rowCount, 1) + + rows = postgres.Target.DB.QueryRow(ctx, "select count(*) from humanresources.generated_table;") + err = rows.Scan(&rowCount) + require.NoError(t, err) + require.Greater(t, rowCount, 1) }) t.Cleanup(func() { @@ -130,6 +141,17 @@ func Test_Sync(t *testing.T) { } err := sync.configureAndRunSync() require.NoError(t, err) + + rows := mysql.Target.DB.QueryRowContext(ctx, "select count(*) from humanresources.locations;") + var rowCount int + err = rows.Scan(&rowCount) + require.NoError(t, err) + require.Greater(t, rowCount, 1) + + rows = mysql.Target.DB.QueryRowContext(ctx, "select count(*) from humanresources.generated_table;") + err = rows.Scan(&rowCount) + require.NoError(t, err) + require.Greater(t, rowCount, 1) }) t.Cleanup(func() { diff --git a/cli/internal/cmds/neosync/sync/sync_test.go b/cli/internal/cmds/neosync/sync/sync_test.go deleted file mode 100644 index a7163ec4be..0000000000 --- a/cli/internal/cmds/neosync/sync/sync_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package sync_cmd - -import ( - "io" - "log/slog" - "testing" - - tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - "github.com/stretchr/testify/require" -) - -func Test_groupConfigsByDependency(t *testing.T) { - tests := []struct { - name string - configs []*benthosConfigResponse - expect [][]*benthosConfigResponse - }{ - { - name: "No dependencies", - configs: []*benthosConfigResponse{ - {Name: "public.users", DependsOn: []*tabledependency.DependsOn{}, Table: "public.users", Columns: []string{"id", "email"}}, - {Name: "public.accounts", DependsOn: []*tabledependency.DependsOn{}, Table: "public.accounts", Columns: []string{"id", "name"}}, - }, - expect: [][]*benthosConfigResponse{ - { - {Name: "public.users", DependsOn: []*tabledependency.DependsOn{}, Table: "public.users", Columns: []string{"id", "email"}}, - {Name: "public.accounts", DependsOn: []*tabledependency.DependsOn{}, Table: "public.accounts", Columns: []string{"id", "name"}}, - }, - }, - }, - { - name: "Multiple dependencies", - configs: []*benthosConfigResponse{ - {Name: "public.users", DependsOn: []*tabledependency.DependsOn{}, Table: "public.users", Columns: []string{"id", "email"}}, - {Name: "public.accounts", DependsOn: []*tabledependency.DependsOn{}, Table: "public.accounts", Columns: []string{"id", "name"}}, - {Name: "public.jobs", DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}}, Table: "public.jobs", Columns: []string{"id", "user_id"}}, - {Name: "public.regions", DependsOn: []*tabledependency.DependsOn{{Table: "public.accounts", Columns: []string{"id"}}}, Table: "public.regions", Columns: []string{"id", "account_id"}}, - {Name: "public.tasks", DependsOn: []*tabledependency.DependsOn{{Table: "public.jobs", Columns: []string{"id"}}}, Table: "public.tasks", Columns: []string{"id", "job_id"}}, - }, - expect: [][]*benthosConfigResponse{ - { - {Name: "public.users", DependsOn: []*tabledependency.DependsOn{}, Table: "public.users", Columns: []string{"id", "email"}}, - {Name: "public.accounts", DependsOn: []*tabledependency.DependsOn{}, Table: "public.accounts", Columns: []string{"id", "name"}}, - }, - { - {Name: "public.jobs", DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}}, Table: "public.jobs", Columns: []string{"id", "user_id"}}, - {Name: "public.regions", DependsOn: []*tabledependency.DependsOn{{Table: "public.accounts", Columns: []string{"id"}}}, Table: "public.regions", Columns: []string{"id", "account_id"}}, - }, - { - {Name: "public.tasks", DependsOn: []*tabledependency.DependsOn{{Table: "public.jobs", Columns: []string{"id"}}}, Table: "public.tasks", Columns: []string{"id", "job_id"}}, - }, - }, - }, - { - name: "Simple dependencies", - configs: []*benthosConfigResponse{ - {Name: "public.users", DependsOn: []*tabledependency.DependsOn{}, Table: "public.users", Columns: []string{"id", "email"}}, - {Name: "public.accounts", DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}}, Table: "public.accounts", Columns: []string{"id", "user_id"}}, - {Name: "public.jobs", DependsOn: []*tabledependency.DependsOn{{Table: "public.accounts", Columns: []string{"id"}}}, Table: "public.jobs", Columns: []string{"id", "account_id"}}, - {Name: "public.regions", DependsOn: []*tabledependency.DependsOn{{Table: "public.jobs", Columns: []string{"id"}}}, Table: "public.regions", Columns: []string{"id", "job_id"}}, - {Name: "public.tasks", DependsOn: []*tabledependency.DependsOn{{Table: "public.regions", Columns: []string{"id"}}}, Table: "public.tasks", Columns: []string{"id", "region_id"}}, - }, - expect: [][]*benthosConfigResponse{ - { - {Name: "public.users", DependsOn: []*tabledependency.DependsOn{}, Table: "public.users", Columns: []string{"id", "email"}}, - }, - { - {Name: "public.accounts", DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}}, Table: "public.accounts", Columns: []string{"id", "user_id"}}, - }, - { - {Name: "public.jobs", DependsOn: []*tabledependency.DependsOn{{Table: "public.accounts", Columns: []string{"id"}}}, Table: "public.jobs", Columns: []string{"id", "account_id"}}, - }, - { - {Name: "public.regions", DependsOn: []*tabledependency.DependsOn{{Table: "public.jobs", Columns: []string{"id"}}}, Table: "public.regions", Columns: []string{"id", "job_id"}}, - }, - { - {Name: "public.tasks", DependsOn: []*tabledependency.DependsOn{{Table: "public.regions", Columns: []string{"id"}}}, Table: "public.tasks", Columns: []string{"id", "region_id"}}, - }, - }, - }, - { - name: "Circular dependencies", - configs: []*benthosConfigResponse{ - {Name: "public.a", DependsOn: []*tabledependency.DependsOn{}, Table: "public.a", Columns: []string{"id"}}, - {Name: "public.b", DependsOn: []*tabledependency.DependsOn{{Table: "public.c", Columns: []string{"id"}}}, Table: "public.b", Columns: []string{"id", "c_id"}}, - {Name: "public.c", DependsOn: []*tabledependency.DependsOn{{Table: "public.a", Columns: []string{"id"}}}, Table: "public.c", Columns: []string{"id", "a_id"}}, - {Name: "public.a.update", DependsOn: []*tabledependency.DependsOn{{Table: "public.b", Columns: []string{"id"}}}, Table: "public.a", Columns: []string{"b_id"}}, - }, - expect: [][]*benthosConfigResponse{ - { - {Name: "public.a", DependsOn: []*tabledependency.DependsOn{}, Table: "public.a", Columns: []string{"id"}}, - }, - { - {Name: "public.c", DependsOn: []*tabledependency.DependsOn{{Table: "public.a", Columns: []string{"id"}}}, Table: "public.c", Columns: []string{"id", "a_id"}}, - }, - { - {Name: "public.b", DependsOn: []*tabledependency.DependsOn{{Table: "public.c", Columns: []string{"id"}}}, Table: "public.b", Columns: []string{"id", "c_id"}}, - }, - { - {Name: "public.a.update", DependsOn: []*tabledependency.DependsOn{{Table: "public.b", Columns: []string{"id"}}}, Table: "public.a", Columns: []string{"b_id"}}, - }, - }, - }, - } - - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - groups := groupConfigsByDependency(tt.configs, logger) - require.Len(t, groups, len(tt.expect)) - for i, group := range groups { - require.Equal(t, len(group), len(tt.expect[i])) - expectedConfigMap := map[string]*benthosConfigResponse{} - for _, cfg := range tt.expect[i] { - expectedConfigMap[cfg.Name] = cfg - } - for _, cfg := range group { - expect := expectedConfigMap[cfg.Name] - require.NotNil(t, expect) - require.ElementsMatch(t, cfg.DependsOn, expect.DependsOn) - } - } - }) - } -} - -func Test_groupConfigsByDependency_Error(t *testing.T) { - configs := []*benthosConfigResponse{ - {Name: "public.a", DependsOn: []*tabledependency.DependsOn{{Table: "public.b", Columns: []string{"id"}}}, Table: "public.a", Columns: []string{"id"}}, - {Name: "public.b", DependsOn: []*tabledependency.DependsOn{{Table: "public.c", Columns: []string{"id"}}}, Table: "public.b", Columns: []string{"id", "c_id"}}, - {Name: "public.c", DependsOn: []*tabledependency.DependsOn{{Table: "public.a", Columns: []string{"id"}}}, Table: "public.c", Columns: []string{"id", "a_id"}}, - } - groups := groupConfigsByDependency(configs, slog.New(slog.NewTextHandler(io.Discard, nil))) - require.Nil(t, groups) -} - -func Test_buildPlainInsertArgs(t *testing.T) { - require.Empty(t, buildPlainInsertArgs(nil)) - require.Empty(t, buildPlainInsertArgs([]string{})) - require.Equal(t, buildPlainInsertArgs([]string{"foo", "bar", "baz"}), `root = [this."foo", this."bar", this."baz"]`) -} diff --git a/cli/internal/cmds/neosync/sync/ui.go b/cli/internal/cmds/neosync/sync/ui.go index 2bbe28772b..261c65c62b 100644 --- a/cli/internal/cmds/neosync/sync/ui.go +++ b/cli/internal/cmds/neosync/sync/ui.go @@ -12,6 +12,7 @@ import ( "golang.org/x/sync/errgroup" "github.com/nucleuscloud/neosync/cli/internal/output" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" _ "github.com/nucleuscloud/neosync/worker/pkg/benthos/sql" _ "github.com/warpstreamlabs/bento/public/components/aws" _ "github.com/warpstreamlabs/bento/public/components/io" @@ -28,7 +29,7 @@ type model struct { ctx context.Context logger *slog.Logger benv *service.Environment - groupedConfigs [][]*benthosConfigResponse + groupedConfigs [][]*benthosbuilder.BenthosConfigResponse tableSynced int index int width int @@ -50,7 +51,7 @@ var ( durationStyle = dotStyle ) -func newModel(ctx context.Context, benv *service.Environment, groupedConfigs [][]*benthosConfigResponse, logger *slog.Logger, outputType output.OutputType) *model { +func newModel(ctx context.Context, benv *service.Environment, groupedConfigs [][]*benthosbuilder.BenthosConfigResponse, logger *slog.Logger, outputType output.OutputType) *model { s := spinner.New() s.Style = lipgloss.NewStyle().Foreground(lipgloss.Color("63")) return &model{ @@ -136,7 +137,7 @@ func (m *model) View() string { type syncedDataMsg map[string]string -func (m *model) syncConfigs(ctx context.Context, configs []*benthosConfigResponse) tea.Cmd { +func (m *model) syncConfigs(ctx context.Context, configs []*benthosbuilder.BenthosConfigResponse) tea.Cmd { return func() tea.Msg { messageMap := syncmap.Map{} errgrp, errctx := errgroup.WithContext(ctx) @@ -179,7 +180,7 @@ func (m *model) syncConfigs(ctx context.Context, configs []*benthosConfigRespons } } -func getConfigCount(groupedConfigs [][]*benthosConfigResponse) int { +func getConfigCount(groupedConfigs [][]*benthosbuilder.BenthosConfigResponse) int { count := 0 for _, group := range groupedConfigs { for _, config := range group { @@ -191,7 +192,7 @@ func getConfigCount(groupedConfigs [][]*benthosConfigResponse) int { return count } -func runSync(ctx context.Context, outputType output.OutputType, benv *service.Environment, groupedConfigs [][]*benthosConfigResponse, logger *slog.Logger) error { +func runSync(ctx context.Context, outputType output.OutputType, benv *service.Environment, groupedConfigs [][]*benthosbuilder.BenthosConfigResponse, logger *slog.Logger) error { var opts []tea.ProgramOption var synclogger = logger if outputType == output.PlainOutput { diff --git a/cli/internal/cmds/neosync/sync/util.go b/cli/internal/cmds/neosync/sync/util.go index ba8da54a29..0b27f5de67 100644 --- a/cli/internal/cmds/neosync/sync/util.go +++ b/cli/internal/cmds/neosync/sync/util.go @@ -10,19 +10,9 @@ import ( mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" sql_manager "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" ) -func buildPlainInsertArgs(cols []string) string { - if len(cols) == 0 { - return "" - } - pieces := make([]string, len(cols)) - for idx := range cols { - pieces[idx] = fmt.Sprintf("this.%q", cols[idx]) - } - return fmt.Sprintf("root = [%s]", strings.Join(pieces, ", ")) -} - func maxInt(a, b int) int { if a > b { return a @@ -54,7 +44,7 @@ func getConnectionType(connection *mgmtv1alpha1.Connection) (ConnectionType, err return "", errors.New("unsupported connection type") } -func isConfigReady(config *benthosConfigResponse, queuedMap map[string][]string) bool { +func isConfigReady(config *benthosbuilder.BenthosConfigResponse, queuedMap map[string][]string) bool { for _, dep := range config.DependsOn { if cols, ok := queuedMap[dep.Table]; ok { for _, dc := range dep.Columns { @@ -69,17 +59,18 @@ func isConfigReady(config *benthosConfigResponse, queuedMap map[string][]string) return true } -func groupConfigsByDependency(configs []*benthosConfigResponse, logger *slog.Logger) [][]*benthosConfigResponse { - groupedConfigs := [][]*benthosConfigResponse{} - configMap := map[string]*benthosConfigResponse{} +func groupConfigsByDependency(configs []*benthosbuilder.BenthosConfigResponse, logger *slog.Logger) [][]*benthosbuilder.BenthosConfigResponse { + groupedConfigs := [][]*benthosbuilder.BenthosConfigResponse{} + configMap := map[string]*benthosbuilder.BenthosConfigResponse{} queuedMap := map[string][]string{} // map -> table to cols // get root configs - rootConfigs := []*benthosConfigResponse{} + rootConfigs := []*benthosbuilder.BenthosConfigResponse{} for _, c := range configs { if len(c.DependsOn) == 0 { + table := fmt.Sprintf("%s.%s", c.TableSchema, c.TableName) rootConfigs = append(rootConfigs, c) - queuedMap[c.Table] = c.Columns + queuedMap[table] = c.Columns } else { configMap[c.Name] = c } @@ -98,7 +89,7 @@ func groupConfigsByDependency(configs []*benthosConfigResponse, logger *slog.Log return nil } prevTableLen = len(configMap) - dependentConfigs := []*benthosConfigResponse{} + dependentConfigs := []*benthosbuilder.BenthosConfigResponse{} for _, c := range configMap { if isConfigReady(c, queuedMap) { dependentConfigs = append(dependentConfigs, c) @@ -108,7 +99,8 @@ func groupConfigsByDependency(configs []*benthosConfigResponse, logger *slog.Log if len(dependentConfigs) > 0 { groupedConfigs = append(groupedConfigs, dependentConfigs) for _, c := range dependentConfigs { - queuedMap[c.Table] = append(queuedMap[c.Table], c.Columns...) + table := fmt.Sprintf("%s.%s", c.TableSchema, c.TableName) + queuedMap[table] = append(queuedMap[table], c.Columns...) } } } diff --git a/go.mod b/go.mod index 8865ba68a3..3cf6ebe1cf 100644 --- a/go.mod +++ b/go.mod @@ -80,7 +80,6 @@ require ( go.temporal.io/api v1.40.0 go.temporal.io/sdk v1.29.1 go.temporal.io/sdk/contrib/opentelemetry v0.6.0 - go.uber.org/atomic v1.11.0 golang.org/x/crypto v0.28.0 golang.org/x/net v0.30.0 golang.org/x/sync v0.8.0 @@ -416,6 +415,7 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect + go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/mod v0.21.0 // indirect diff --git a/internal/benthos/benthos-builder/benthos-builder.go b/internal/benthos/benthos-builder/benthos-builder.go new file mode 100644 index 0000000000..c3b30a6a24 --- /dev/null +++ b/internal/benthos/benthos-builder/benthos-builder.go @@ -0,0 +1,324 @@ +package benthosbuilder + +import ( + "fmt" + "log/slog" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" + "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_conns "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/builders" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" + "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" +) + +// BenthosConfigResponse represents a complete Benthos data pipeline configuration for a specific table, +type BenthosConfigResponse struct { + Name string + DependsOn []*tabledependency.DependsOn + + // TODO refactor these out + Config *neosync_benthos.BenthosConfig + TableSchema string + TableName string + Columns []string + RunType tabledependency.RunType + ColumnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties + RedisDependsOn map[string][]string + BenthosDsns []*bb_shared.BenthosDsn + RedisConfig []*bb_shared.BenthosRedisConfig +} + +// Combines a connection type and job type to uniquely identify a builder configuration +type BuilderKey struct { + ConnType bb_internal.ConnectionType + JobType bb_internal.JobType +} + +func (b *BuilderKey) String() string { + return fmt.Sprintf("%s.%s", b.JobType, b.ConnType) +} + +// Manages and provides access to different Benthos builders based on connection and job types +type BuilderProvider struct { + builders map[string]bb_internal.BenthosBuilder + logger *slog.Logger +} + +// Creates a new BuilderProvider for managing builders +func NewBuilderProvider(logger *slog.Logger) *BuilderProvider { + r := &BuilderProvider{ + builders: make(map[string]bb_internal.BenthosBuilder), + logger: logger, + } + return r +} + +// Handles registering new builders +func (r *BuilderProvider) Register(jobType bb_internal.JobType, connType bb_internal.ConnectionType, builder bb_internal.BenthosBuilder) { + key := BuilderKey{ConnType: connType, JobType: jobType} + _, exists := r.builders[key.String()] + if !exists { + r.logger.Debug(fmt.Sprintf("registering benthos builder for job type %s and connection type %s", jobType, connType)) + r.builders[key.String()] = builder + } +} + +// Handles getting builder based on job and connection type +func (r *BuilderProvider) GetBuilder( + job *mgmtv1alpha1.Job, + connection *mgmtv1alpha1.Connection, +) (bb_internal.BenthosBuilder, error) { + connectionType := bb_internal.GetConnectionType(connection) + jobType := bb_internal.GetJobType(job) + key := BuilderKey{ConnType: connectionType, JobType: jobType} + builder, exists := r.builders[key.String()] + if !exists { + return nil, fmt.Errorf("unsupported connection type: %s", connectionType) + } + return builder, nil +} + +// Handles registering what is considered standard builders +func (b *BuilderProvider) registerStandardBuilders( + job *mgmtv1alpha1.Job, + sourceConnection *mgmtv1alpha1.Connection, + destinationConnections []*mgmtv1alpha1.Connection, + sqlmanagerclient sqlmanager.SqlManagerClient, + transformerclient mgmtv1alpha1connect.TransformersServiceClient, + connectionclient mgmtv1alpha1connect.ConnectionServiceClient, + redisConfig *shared.RedisConfig, + postgresDriverOverride *string, + selectQueryBuilder bb_shared.SelectQueryMapBuilder, +) error { + sourceConnectionType := bb_internal.GetConnectionType(sourceConnection) + jobType := bb_internal.GetJobType(job) + connectionTypes := []bb_internal.ConnectionType{sourceConnectionType} + for _, dest := range destinationConnections { + connectionTypes = append(connectionTypes, bb_internal.GetConnectionType(dest)) + } + + if jobType == bb_internal.JobTypeSync { + for _, connectionType := range connectionTypes { + switch connectionType { + case bb_internal.ConnectionTypePostgres: + driver := sqlmanager_shared.PostgresDriver + if postgresDriverOverride != nil && *postgresDriverOverride != "" { + driver = *postgresDriverOverride + } + sqlbuilder := bb_conns.NewSqlSyncBuilder(transformerclient, sqlmanagerclient, redisConfig, driver, selectQueryBuilder) + b.Register(bb_internal.JobTypeSync, connectionType, sqlbuilder) + case bb_internal.ConnectionTypeMysql: + sqlbuilder := bb_conns.NewSqlSyncBuilder(transformerclient, sqlmanagerclient, redisConfig, sqlmanager_shared.MysqlDriver, selectQueryBuilder) + b.Register(bb_internal.JobTypeSync, connectionType, sqlbuilder) + case bb_internal.ConnectionTypeMssql: + sqlbuilder := bb_conns.NewSqlSyncBuilder(transformerclient, sqlmanagerclient, redisConfig, sqlmanager_shared.MssqlDriver, selectQueryBuilder) + b.Register(bb_internal.JobTypeSync, connectionType, sqlbuilder) + case bb_internal.ConnectionTypeAwsS3: + b.Register(bb_internal.JobTypeSync, bb_internal.ConnectionTypeAwsS3, bb_conns.NewAwsS3SyncBuilder()) + case bb_internal.ConnectionTypeDynamodb: + b.Register(bb_internal.JobTypeSync, bb_internal.ConnectionTypeDynamodb, bb_conns.NewDynamoDbSyncBuilder(transformerclient)) + case bb_internal.ConnectionTypeMongo: + b.Register(bb_internal.JobTypeSync, bb_internal.ConnectionTypeMongo, bb_conns.NewMongoDbSyncBuilder(transformerclient)) + case bb_internal.ConnectionTypeGCP: + b.Register(bb_internal.JobTypeSync, bb_internal.ConnectionTypeGCP, bb_conns.NewGcpCloudStorageSyncBuilder()) + default: + return fmt.Errorf("unsupport connection type for sync job: %s", connectionType) + } + } + } + + if jobType == bb_internal.JobTypeAIGenerate { + if len(destinationConnections) != 1 { + return fmt.Errorf("unsupported destination count for AI generate job: %d", len(destinationConnections)) + } + destConnType := bb_internal.GetConnectionType(destinationConnections[0]) + driver, err := bb_internal.GetSqlDriverByConnectionType(destConnType) + if err != nil { + return err + } + builder := bb_conns.NewGenerateAIBuilder(transformerclient, sqlmanagerclient, connectionclient, driver) + b.Register(bb_internal.JobTypeAIGenerate, bb_internal.ConnectionTypeOpenAI, builder) + b.Register(bb_internal.JobTypeAIGenerate, destConnType, builder) + } + if jobType == bb_internal.JobTypeGenerate { + for _, connectionType := range connectionTypes { + driver, err := bb_internal.GetSqlDriverByConnectionType(connectionType) + if err != nil { + return err + } + b.Register(bb_internal.JobTypeGenerate, connectionType, bb_conns.NewGenerateBuilder(transformerclient, sqlmanagerclient, connectionclient, driver)) + } + } + return nil +} + +// Adds builder logger tags +func withBenthosConfigLoggerTags( + job *mgmtv1alpha1.Job, + sourceConnection *mgmtv1alpha1.Connection, +) []any { + keyvals := []any{} + + sourceConnectionType := bb_internal.GetConnectionType(sourceConnection) + jobType := bb_internal.GetJobType(job) + + if sourceConnectionType != "" { + keyvals = append(keyvals, "sourceConnectionType", sourceConnectionType) + } + if jobType != "" { + keyvals = append(keyvals, "jobType", jobType) + } + + return keyvals +} + +// Manages the creation and management of Benthos configurations +type BenthosConfigManager struct { + sourceProvider *BuilderProvider + destinationProvider *BuilderProvider + metricsEnabled bool + metricLabelKeyVals map[string]string + logger *slog.Logger + job *mgmtv1alpha1.Job + sourceConnection *mgmtv1alpha1.Connection + destinationConnections []*mgmtv1alpha1.Connection + runId string +} + +// Manages all necessary configuration parameters for creating +// a worker-based Benthos configuration manager +type WorkerBenthosConfig struct { + Job *mgmtv1alpha1.Job + SourceConnection *mgmtv1alpha1.Connection + DestinationConnections []*mgmtv1alpha1.Connection + RunId string + MetricLabelKeyVals map[string]string + Logger *slog.Logger + Sqlmanagerclient sqlmanager.SqlManagerClient + Transformerclient mgmtv1alpha1connect.TransformersServiceClient + Connectionclient mgmtv1alpha1connect.ConnectionServiceClient + RedisConfig *shared.RedisConfig + MetricsEnabled bool + SelectQueryBuilder bb_shared.SelectQueryMapBuilder +} + +// Creates a new BenthosConfigManager configured for worker +func NewWorkerBenthosConfigManager( + config *WorkerBenthosConfig, +) (*BenthosConfigManager, error) { + provider := NewBuilderProvider(config.Logger) + err := provider.registerStandardBuilders( + config.Job, + config.SourceConnection, + config.DestinationConnections, + config.Sqlmanagerclient, + config.Transformerclient, + config.Connectionclient, + config.RedisConfig, + nil, + config.SelectQueryBuilder, + ) + if err != nil { + return nil, err + } + logger := config.Logger.With(withBenthosConfigLoggerTags(config.Job, config.SourceConnection)...) + return &BenthosConfigManager{ + sourceProvider: provider, + destinationProvider: provider, + metricsEnabled: config.MetricsEnabled, + metricLabelKeyVals: config.MetricLabelKeyVals, + logger: logger, + job: config.Job, + sourceConnection: config.SourceConnection, + destinationConnections: config.DestinationConnections, + runId: config.RunId, + }, nil +} + +// Manages all necessary configuration parameters for creating +// a CLI-based Benthos configuration manager +type CliBenthosConfig struct { + Job *mgmtv1alpha1.Job + SourceConnection *mgmtv1alpha1.Connection + DestinationConnection *mgmtv1alpha1.Connection + SourceJobRunId *string // for use when AWS S3 is the source + PostgresDriverOverride *string // optional driver override. used for postgres + SyncConfigs []*tabledependency.RunConfig + RunId string + MetricLabelKeyVals map[string]string + Logger *slog.Logger + Sqlmanagerclient sqlmanager.SqlManagerClient + Transformerclient mgmtv1alpha1connect.TransformersServiceClient + Connectiondataclient mgmtv1alpha1connect.ConnectionDataServiceClient + RedisConfig *shared.RedisConfig + MetricsEnabled bool +} + +// Creates a new BenthosConfigManager configured for CLI +func NewCliBenthosConfigManager( + config *CliBenthosConfig, +) (*BenthosConfigManager, error) { + destinationProvider := NewBuilderProvider(config.Logger) + err := destinationProvider.registerStandardBuilders( + config.Job, + config.SourceConnection, + []*mgmtv1alpha1.Connection{config.DestinationConnection}, + config.Sqlmanagerclient, + config.Transformerclient, + nil, + config.RedisConfig, + config.PostgresDriverOverride, + nil, + ) + if err != nil { + return nil, err + } + + sourceProvider := NewCliSourceBuilderProvider(config) + + logger := config.Logger.With(withBenthosConfigLoggerTags(config.Job, config.SourceConnection)...) + return &BenthosConfigManager{ + sourceProvider: sourceProvider, + destinationProvider: destinationProvider, + metricsEnabled: config.MetricsEnabled, + logger: logger, + job: config.Job, + sourceConnection: config.SourceConnection, + destinationConnections: []*mgmtv1alpha1.Connection{config.DestinationConnection}, + runId: config.RunId, + }, nil +} + +// NewCliSourceBuilderProvider creates a specialized provider for CLI source operations +func NewCliSourceBuilderProvider( + config *CliBenthosConfig, +) *BuilderProvider { + provider := NewBuilderProvider(config.Logger) + + sourceConnectionType := bb_internal.GetConnectionType(config.SourceConnection) + jobType := bb_internal.GetJobType(config.Job) + + builder := bb_conns.NewNeosyncConnectionDataSyncBuilder( + config.Connectiondataclient, + config.Sqlmanagerclient, + config.SourceJobRunId, + config.SyncConfigs, + config.DestinationConnection, + sourceConnectionType, + ) + + if jobType == bb_internal.JobTypeSync { + switch sourceConnectionType { + case bb_internal.ConnectionTypePostgres, bb_internal.ConnectionTypeMysql, + bb_internal.ConnectionTypeMssql, bb_internal.ConnectionTypeAwsS3, bb_internal.ConnectionTypeDynamodb: + provider.Register(bb_internal.JobTypeSync, sourceConnectionType, builder) + } + } + + return provider +} diff --git a/internal/benthos/benthos-builder/builders/aws-s3.go b/internal/benthos/benthos-builder/builders/aws-s3.go new file mode 100644 index 0000000000..2548282160 --- /dev/null +++ b/internal/benthos/benthos-builder/builders/aws-s3.go @@ -0,0 +1,179 @@ +package benthosbuilder_builders + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +) + +type awsS3SyncBuilder struct { +} + +func NewAwsS3SyncBuilder() bb_internal.BenthosBuilder { + return &awsS3SyncBuilder{} +} + +func (b *awsS3SyncBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + return nil, errors.ErrUnsupported +} + +func (b *awsS3SyncBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + config := &bb_internal.BenthosDestinationConfig{} + + benthosConfig := params.SourceConfig + if benthosConfig.RunType == tabledependency.RunTypeUpdate { + return config, nil + } + destinationOpts := params.DestinationOpts.GetAwsS3Options() + connAwsS3Config := params.DestConnection.GetConnectionConfig().GetAwsS3Config() + + if destinationOpts == nil { + return nil, errors.New("destination must have configured AWS S3 options") + } + if connAwsS3Config == nil { + return nil, errors.New("destination must have configured AWS S3 config") + } + + s3pathpieces := []string{} + if connAwsS3Config.PathPrefix != nil && *connAwsS3Config.PathPrefix != "" { + s3pathpieces = append(s3pathpieces, strings.Trim(*connAwsS3Config.PathPrefix, "/")) + } + + s3pathpieces = append( + s3pathpieces, + "workflows", + params.RunId, + "activities", + neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName), + "data", + `records-${!count("files")}-${!timestamp_unix_nano()}.jsonl.gz`, + ) + + maxInFlight := 64 + if destinationOpts.GetMaxInFlight() > 0 { + maxInFlight = int(destinationOpts.GetMaxInFlight()) + } + + batchCount := 100 + batchPeriod := "5s" + batchConfig := destinationOpts.GetBatch() + if batchConfig != nil { + batchCount = int(batchConfig.GetCount()) + + if batchConfig.GetPeriod() != "" { + _, err := time.ParseDuration(batchConfig.GetPeriod()) + if err != nil { + return nil, fmt.Errorf("unable to parse batch period for s3 destination config: %w", err) + } + } + batchPeriod = batchConfig.GetPeriod() + } + + if batchCount == 0 && batchPeriod == "" { + return nil, fmt.Errorf("must have at least one batch policy configured. Cannot disable both period and count") + } + + timeout := "" + if destinationOpts.GetTimeout() != "" { + _, err := time.ParseDuration(destinationOpts.GetTimeout()) + if err != nil { + return nil, fmt.Errorf("unable to parse timeout for s3 destination config: %w", err) + } + timeout = destinationOpts.GetTimeout() + } + + storageClass := "" + if destinationOpts.GetStorageClass() != mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_UNSPECIFIED { + storageClass = convertToS3StorageClass(destinationOpts.GetStorageClass()).String() + } + + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + Fallback: []neosync_benthos.Outputs{ + { + AwsS3: &neosync_benthos.AwsS3Insert{ + Bucket: connAwsS3Config.Bucket, + MaxInFlight: maxInFlight, + Timeout: timeout, + StorageClass: storageClass, + Path: strings.Join(s3pathpieces, "/"), + ContentType: "application/gzip", + Batching: &neosync_benthos.Batching{ + Count: batchCount, + Period: batchPeriod, + Processors: []*neosync_benthos.BatchProcessor{ + {Archive: &neosync_benthos.ArchiveProcessor{Format: "lines"}}, + {Compress: &neosync_benthos.CompressProcessor{Algorithm: "gzip"}}, + }, + }, + Credentials: buildBenthosS3Credentials(connAwsS3Config.Credentials), + Region: connAwsS3Config.GetRegion(), + Endpoint: connAwsS3Config.GetEndpoint(), + }, + }, + // kills activity depending on error + {Error: &neosync_benthos.ErrorOutputConfig{ + ErrorMsg: `${! meta("fallback_error")}`, + Batching: &neosync_benthos.Batching{ + Period: batchPeriod, + Count: batchCount, + }, + }}, + }, + }) + + return config, nil +} + +type S3StorageClass int + +const ( + S3StorageClass_UNSPECIFIED S3StorageClass = iota + S3StorageClass_STANDARD + S3StorageClass_REDUCED_REDUNDANCY + S3StorageClass_GLACIER + S3StorageClass_STANDARD_IA + S3StorageClass_ONEZONE_IA + S3StorageClass_INTELLIGENT_TIERING + S3StorageClass_DEEP_ARCHIVE +) + +func (s S3StorageClass) String() string { + return [...]string{ + "STORAGE_CLASS_UNSPECIFIED", + "STANDARD", + "REDUCED_REDUNDANCY", + "GLACIER", + "STANDARD_IA", + "ONEZONE_IA", + "INTELLIGENT_TIERING", + "DEEP_ARCHIVE", + }[s] +} + +func convertToS3StorageClass(protoStorageClass mgmtv1alpha1.AwsS3DestinationConnectionOptions_StorageClass) S3StorageClass { + switch protoStorageClass { + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_STANDARD: + return S3StorageClass_STANDARD + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_REDUCED_REDUNDANCY: + return S3StorageClass_REDUCED_REDUNDANCY + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_GLACIER: + return S3StorageClass_GLACIER + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_STANDARD_IA: + return S3StorageClass_STANDARD_IA + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_ONEZONE_IA: + return S3StorageClass_ONEZONE_IA + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_INTELLIGENT_TIERING: + return S3StorageClass_INTELLIGENT_TIERING + case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_DEEP_ARCHIVE: + return S3StorageClass_DEEP_ARCHIVE + default: + return S3StorageClass_UNSPECIFIED + } +} diff --git a/internal/benthos/benthos-builder/builders/benthos-builder_test.go b/internal/benthos/benthos-builder/builders/benthos-builder_test.go new file mode 100644 index 0000000000..988edc3e5c --- /dev/null +++ b/internal/benthos/benthos-builder/builders/benthos-builder_test.go @@ -0,0 +1,1537 @@ +package benthosbuilder_builders + +import ( + "context" + "fmt" + "log/slog" + "os" + "testing" + + "connectrpc.com/connect" + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + "github.com/nucleuscloud/neosync/internal/gotypeutil" + "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/warpstreamlabs/bento/public/bloblang" + + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +) + +const ( + mockJobId = "b1767636-3992-4cb4-9bf2-4bb9bddbf43c" + mockWorkflowId = "b1767636-3992-4cb4-9bf2-4bb9bddbf43c-workflowid" + mockRunId = "26444272-0bb0-4325-ae60-17dcd9744785" +) + +var dsn = "dsn" +var driver = sqlmanager_shared.PostgresDriver + +func Test_ProcessorConfigEmpty(t *testing.T) { + mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) + + tableMappings := map[string]*tableMapping{ + "public.users": {Schema: "public", + Table: "users", + Mappings: []*mgmtv1alpha1.JobMapping{ + { + Schema: "public", + Table: "users", + Column: "id", + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, + }, + }, + }, + { + Schema: "public", + Table: "users", + Column: "name", + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, + }, + }, + }, + }, + }} + + groupedSchemas := map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{ + OrdinalPosition: 1, + ColumnDefault: "324", + IsNullable: false, + DataType: "", + CharacterMaximumLength: nil, + NumericPrecision: nil, + NumericScale: nil, + }, + "name": &sqlmanager_shared.ColumnInfo{ + OrdinalPosition: 1, + ColumnDefault: "324", + IsNullable: false, + DataType: "", + CharacterMaximumLength: nil, + NumericPrecision: nil, + NumericScale: nil, + }, + }, + } + groupedTransformers := map[string]map[string]*mgmtv1alpha1.JobMappingTransformer{ + "public.users": { + "id": &mgmtv1alpha1.JobMappingTransformer{}, + "name": &mgmtv1alpha1.JobMappingTransformer{}, + }, + } + queryMap := map[string]map[tabledependency.RunType]string{ + "public.users": {tabledependency.RunTypeInsert: ""}, + } + runconfigs := []*tabledependency.RunConfig{ + tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"id", "name"}, []string{"id", "name"}, []*tabledependency.DependsOn{}, false), + } + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + + res, err := buildBenthosSqlSourceConfigResponses( + logger, + context.Background(), + mockTransformerClient, + tableMappings, + runconfigs, + dsn, + driver, + queryMap, + groupedSchemas, + map[string][]*sqlmanager_shared.ForeignConstraint{}, + groupedTransformers, + mockJobId, + mockRunId, + nil, + nil, + ) + require.Nil(t, err) + require.Empty(t, res[0].Config.StreamConfig.Pipeline.Processors) +} + +func Test_ProcessorConfigEmptyJavascript(t *testing.T) { + mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) + + tableMappings := map[string]*tableMapping{ + "public.users": {Schema: "public", + Table: "users", + Mappings: []*mgmtv1alpha1.JobMapping{ + { + Schema: "public", + Table: "users", + Column: "id", + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{ + PassthroughConfig: &mgmtv1alpha1.Passthrough{}, + }, + }, + }, + }, + { + Schema: "public", + Table: "users", + Column: "name", + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformJavascriptConfig{ + TransformJavascriptConfig: &mgmtv1alpha1.TransformJavascript{Code: ""}, + }, + }, + }, + }, + }, + }} + + groupedSchemas := map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{ + OrdinalPosition: 1, + ColumnDefault: "324", + IsNullable: false, + DataType: "", + CharacterMaximumLength: nil, + NumericPrecision: nil, + NumericScale: nil, + }, + "name": &sqlmanager_shared.ColumnInfo{ + OrdinalPosition: 1, + ColumnDefault: "324", + IsNullable: false, + DataType: "", + CharacterMaximumLength: nil, + NumericPrecision: nil, + NumericScale: nil, + }, + }, + } + + groupedTransformers := map[string]map[string]*mgmtv1alpha1.JobMappingTransformer{ + "public.users": { + "id": &mgmtv1alpha1.JobMappingTransformer{}, + "name": &mgmtv1alpha1.JobMappingTransformer{}, + }, + } + + runconfigs := []*tabledependency.RunConfig{ + tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"id", "name"}, []string{"id", "name"}, []*tabledependency.DependsOn{}, false), + } + + queryMap := map[string]map[tabledependency.RunType]string{ + "public.users": {tabledependency.RunTypeInsert: ""}, + } + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + + res, err := buildBenthosSqlSourceConfigResponses( + logger, + context.Background(), + mockTransformerClient, + tableMappings, + runconfigs, + dsn, + driver, + queryMap, + groupedSchemas, + map[string][]*sqlmanager_shared.ForeignConstraint{}, + groupedTransformers, + mockJobId, + mockRunId, + nil, + nil, + ) + require.NoError(t, err) + require.Empty(t, res[0].Config.StreamConfig.Pipeline.Processors) +} + +func TestAreMappingsSubsetOfSchemas(t *testing.T) { + ok := areMappingsSubsetOfSchemas( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + "created_by": &sqlmanager_shared.ColumnInfo{}, + "updated_by": &sqlmanager_shared.ColumnInfo{}, + }, + "neosync_api.accounts": { + "id": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + {Schema: "public", Table: "users", Column: "created_by"}, + }, + ) + require.True(t, ok, "job mappings are a subset of the present database schemas") + + ok = areMappingsSubsetOfSchemas( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id2"}, + }, + ) + require.False(t, ok, "job mappings contain mapping that is not in the source schema") + + ok = areMappingsSubsetOfSchemas( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + {Schema: "public", Table: "users", Column: "created_by"}, + }, + ) + require.False(t, ok, "job mappings contain more mappings than are present in the source schema") +} + +func TestShouldHaltOnSchemaAddition(t *testing.T) { + ok := shouldHaltOnSchemaAddition( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + "created_by": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + {Schema: "public", Table: "users", Column: "created_by"}, + }, + ) + require.False(t, ok, "job mappings are valid set of database schemas") + + ok = shouldHaltOnSchemaAddition( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + "created_by": &sqlmanager_shared.ColumnInfo{}, + }, + "neosync_api.accounts": { + "id": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + {Schema: "public", Table: "users", Column: "created_by"}, + }, + ) + require.True(t, ok, "job mappings are missing database schema mappings") + + ok = shouldHaltOnSchemaAddition( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + "created_by": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + }, + ) + require.True(t, ok, "job mappings are missing table column") + + ok = shouldHaltOnSchemaAddition( + map[string]map[string]*sqlmanager_shared.ColumnInfo{ + "public.users": { + "id": &sqlmanager_shared.ColumnInfo{}, + "created_by": &sqlmanager_shared.ColumnInfo{}, + }, + }, + []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + {Schema: "public", Table: "users", Column: "updated_by"}, + }, + ) + require.True(t, ok, "job mappings have same column count, but missing specific column") +} + +func Test_buildProcessorConfigsMutation(t *testing.T) { + mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) + + ctx := context.Background() + + runconfig := tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{}, nil, []string{}, []string{}, []*tabledependency.DependsOn{}, false) + output, err := buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{}, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + require.Nil(t, err) + require.Empty(t, output) + + output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{}, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + require.Nil(t, err) + require.Empty(t, output) + + runconfig = tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{}, nil, []string{}, []string{"id"}, []*tabledependency.DependsOn{}, false) + output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id"}, + }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + require.Nil(t, err) + require.Empty(t, output) + + output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{}}, + }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + require.Nil(t, err) + require.Empty(t, output) + + output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, + }}}, + }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + require.Nil(t, err) + require.Empty(t, output) + + runconfig = tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{}, nil, []string{}, []string{"id", "name"}, []*tabledependency.DependsOn{}, false) + output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{ + Nullconfig: &mgmtv1alpha1.Null{}, + }, + }}}, + {Schema: "public", Table: "users", Column: "name", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{ + Nullconfig: &mgmtv1alpha1.Null{}, + }, + }}}, + }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + + require.Nil(t, err) + + require.Equal(t, *output[0].Mutation, "root.\"id\" = null\nroot.\"name\" = null") + + jsT := mgmtv1alpha1.SystemTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ + TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ + PreserveDomain: gotypeutil.ToPtr(true), + PreserveLength: gotypeutil.ToPtr(false), + ExcludedDomains: []string{}, + }, + }, + }, + } + + emailLength := 40 + + groupedSchemas := map[string]*sqlmanager_shared.ColumnInfo{ + + "email": { + OrdinalPosition: 2, + ColumnDefault: "", + IsNullable: true, + DataType: "timestamptz", + CharacterMaximumLength: &emailLength, + NumericPrecision: nil, + NumericScale: nil, + }, + } + + runconfig = tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"email"}, []string{"email"}, []*tabledependency.DependsOn{}, false) + output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "email", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}}}, groupedSchemas, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) + + require.Nil(t, err) + require.Equal(t, `root."email" = transform_email(value:this."email",preserve_length:false,preserve_domain:true,excluded_domains:[],max_length:40,email_type:"uuidv4",invalid_email_action:"reject")`, *output[0].Mutation) +} + +func Test_ShouldProcessColumnTrue(t *testing.T) { + val := &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateEmailConfig{ + GenerateEmailConfig: &mgmtv1alpha1.GenerateEmail{}, + }, + }, + } + + res := shouldProcessColumn(val) + require.Equal(t, true, res) +} + +func Test_ShouldProcessColumnFalse(t *testing.T) { + val := &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{ + PassthroughConfig: &mgmtv1alpha1.Passthrough{}, + }, + }, + } + + res := shouldProcessColumn(val) + require.Equal(t, false, res) +} + +func Test_buildProcessorConfigsJavascriptEmpty(t *testing.T) { + mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) + ctx := context.Background() + + jsT := mgmtv1alpha1.SystemTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformJavascriptConfig{ + TransformJavascriptConfig: &mgmtv1alpha1.TransformJavascript{ + Code: ``, + }, + }, + }, + } + + runconfig := tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"id"}, []string{"id"}, []*tabledependency.DependsOn{}, false) + resp, err := buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ + {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}}}, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, + []string{}) + + require.NoError(t, err) + require.Empty(t, resp) +} + +func Test_convertUserDefinedFunctionConfig(t *testing.T) { + mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) + + ctx := context.Background() + + mockTransformerClient.On( + "GetUserDefinedTransformerById", + mock.Anything, + connect.NewRequest(&mgmtv1alpha1.GetUserDefinedTransformerByIdRequest{ + TransformerId: "123", + }), + ).Return(connect.NewResponse(&mgmtv1alpha1.GetUserDefinedTransformerByIdResponse{ + Transformer: &mgmtv1alpha1.UserDefinedTransformer{ + Id: "123", + Name: "stage", + Description: "description", + DataType: mgmtv1alpha1.TransformerDataType_TRANSFORMER_DATA_TYPE_STRING, + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_EMAIL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ + TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ + PreserveDomain: gotypeutil.ToPtr(true), + PreserveLength: gotypeutil.ToPtr(false), + ExcludedDomains: []string{}, + }, + }, + }, + }, + }), nil) + + jmt := &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_UserDefinedTransformerConfig{ + UserDefinedTransformerConfig: &mgmtv1alpha1.UserDefinedTransformerConfig{ + Id: "123", + }, + }, + }, + } + + expected := &mgmtv1alpha1.JobMappingTransformer{ + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_EMAIL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ + TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ + PreserveDomain: gotypeutil.ToPtr(true), + PreserveLength: gotypeutil.ToPtr(false), + ExcludedDomains: []string{}, + }, + }, + }, + } + + resp, err := convertUserDefinedFunctionConfig(ctx, mockTransformerClient, jmt) + require.NoError(t, err) + require.Equal(t, resp, expected) +} + +func Test_buildPlainInsertArgs(t *testing.T) { + require.Empty(t, buildPlainInsertArgs(nil)) + require.Empty(t, buildPlainInsertArgs([]string{})) + require.Equal(t, buildPlainInsertArgs([]string{"foo", "bar", "baz"}), `root = [this."foo", this."bar", this."baz"]`) +} + +func Test_buildPlainColumns(t *testing.T) { + require.Empty(t, buildPlainColumns(nil)) + require.Empty(t, buildPlainColumns([]*mgmtv1alpha1.JobMapping{})) + require.Equal( + t, + buildPlainColumns([]*mgmtv1alpha1.JobMapping{ + {Column: "foo"}, + {Column: "bar"}, + {Column: "baz"}, + }), + []string{"foo", "bar", "baz"}, + ) +} + +func Test_buildBenthosS3Credentials(t *testing.T) { + require.Nil(t, buildBenthosS3Credentials(nil)) + + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{}), + &neosync_benthos.AwsCredentials{}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{Profile: shared.Ptr("foo")}), + &neosync_benthos.AwsCredentials{Profile: "foo"}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{AccessKeyId: shared.Ptr("foo")}), + &neosync_benthos.AwsCredentials{Id: "foo"}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{SecretAccessKey: shared.Ptr("foo")}), + &neosync_benthos.AwsCredentials{Secret: "foo"}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{SessionToken: shared.Ptr("foo")}), + &neosync_benthos.AwsCredentials{Token: "foo"}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{FromEc2Role: shared.Ptr(true)}), + &neosync_benthos.AwsCredentials{FromEc2Role: true}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{RoleArn: shared.Ptr("foo")}), + &neosync_benthos.AwsCredentials{Role: "foo"}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{RoleExternalId: shared.Ptr("foo")}), + &neosync_benthos.AwsCredentials{RoleExternalId: "foo"}, + ) + require.Equal( + t, + buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{ + Profile: shared.Ptr("profile"), + AccessKeyId: shared.Ptr("access-key"), + SecretAccessKey: shared.Ptr("secret"), + SessionToken: shared.Ptr("session"), + FromEc2Role: shared.Ptr(false), + RoleArn: shared.Ptr("role"), + RoleExternalId: shared.Ptr("foo"), + }), + &neosync_benthos.AwsCredentials{ + Profile: "profile", + Id: "access-key", + Secret: "secret", + Token: "session", + FromEc2Role: false, + Role: "role", + RoleExternalId: "foo", + }, + ) +} + +func Test_computeMutationFunction_null(t *testing.T) { + val, err := computeMutationFunction( + &mgmtv1alpha1.JobMapping{ + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{}}, + }, + }, &sqlmanager_shared.ColumnInfo{}, false) + require.NoError(t, err) + require.Equal(t, val, "null") +} + +func Test_computeMutationFunction_Validate_Bloblang_Output(t *testing.T) { + uuidEmailType := mgmtv1alpha1.GenerateEmailType_GENERATE_EMAIL_TYPE_UUID_V4 + transformers := []*mgmtv1alpha1.SystemTransformer{ + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_EMAIL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateEmailConfig{ + GenerateEmailConfig: &mgmtv1alpha1.GenerateEmail{ + EmailType: &uuidEmailType, + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_EMAIL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ + TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ + PreserveDomain: gotypeutil.ToPtr(false), + PreserveLength: gotypeutil.ToPtr(false), + ExcludedDomains: []string{"gmail", "yahoo"}, + EmailType: &uuidEmailType, + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_BOOL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{ + GenerateBoolConfig: &mgmtv1alpha1.GenerateBool{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_CARD_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateCardNumberConfig{ + GenerateCardNumberConfig: &mgmtv1alpha1.GenerateCardNumber{ + ValidLuhn: gotypeutil.ToPtr(true), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_CITY, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateCityConfig{ + GenerateCityConfig: &mgmtv1alpha1.GenerateCity{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_E164_PHONE_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateE164PhoneNumberConfig{ + GenerateE164PhoneNumberConfig: &mgmtv1alpha1.GenerateE164PhoneNumber{ + Min: gotypeutil.ToPtr(int64(9)), + Max: gotypeutil.ToPtr(int64(15)), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FIRST_NAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateFirstNameConfig{ + GenerateFirstNameConfig: &mgmtv1alpha1.GenerateFirstName{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FLOAT64, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateFloat64Config{ + GenerateFloat64Config: &mgmtv1alpha1.GenerateFloat64{ + RandomizeSign: gotypeutil.ToPtr(true), + Min: gotypeutil.ToPtr(1.00), + Max: gotypeutil.ToPtr(100.00), + Precision: gotypeutil.ToPtr(int64(6)), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FULL_ADDRESS, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateFullAddressConfig{ + GenerateFullAddressConfig: &mgmtv1alpha1.GenerateFullAddress{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FULL_NAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateFullNameConfig{ + GenerateFullNameConfig: &mgmtv1alpha1.GenerateFullName{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_GENDER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateGenderConfig{ + GenerateGenderConfig: &mgmtv1alpha1.GenerateGender{ + Abbreviate: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_INT64_PHONE_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64PhoneNumberConfig{ + GenerateInt64PhoneNumberConfig: &mgmtv1alpha1.GenerateInt64PhoneNumber{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_INT64, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64Config{ + GenerateInt64Config: &mgmtv1alpha1.GenerateInt64{ + RandomizeSign: gotypeutil.ToPtr(true), + Min: gotypeutil.ToPtr(int64(1)), + Max: gotypeutil.ToPtr(int64(40)), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_LAST_NAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateLastNameConfig{ + GenerateLastNameConfig: &mgmtv1alpha1.GenerateLastName{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_SHA256HASH, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateSha256HashConfig{ + GenerateSha256HashConfig: &mgmtv1alpha1.GenerateSha256Hash{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_SSN, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateSsnConfig{ + GenerateSsnConfig: &mgmtv1alpha1.GenerateSSN{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_STATE, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateStateConfig{ + GenerateStateConfig: &mgmtv1alpha1.GenerateState{ + GenerateFullName: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_STREET_ADDRESS, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateStreetAddressConfig{ + GenerateStreetAddressConfig: &mgmtv1alpha1.GenerateStreetAddress{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_STRING_PHONE_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateStringPhoneNumberConfig{ + GenerateStringPhoneNumberConfig: &mgmtv1alpha1.GenerateStringPhoneNumber{ + Min: gotypeutil.ToPtr(int64(9)), + Max: gotypeutil.ToPtr(int64(14)), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_RANDOM_STRING, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateStringConfig{ + GenerateStringConfig: &mgmtv1alpha1.GenerateString{ + Min: gotypeutil.ToPtr(int64(2)), + Max: gotypeutil.ToPtr(int64(7)), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_UNIXTIMESTAMP, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateUnixtimestampConfig{ + GenerateUnixtimestampConfig: &mgmtv1alpha1.GenerateUnixTimestamp{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_USERNAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateUsernameConfig{ + GenerateUsernameConfig: &mgmtv1alpha1.GenerateUsername{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_UTCTIMESTAMP, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateUtctimestampConfig{ + GenerateUtctimestampConfig: &mgmtv1alpha1.GenerateUtcTimestamp{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_UUID, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateUuidConfig{ + GenerateUuidConfig: &mgmtv1alpha1.GenerateUuid{ + IncludeHyphens: gotypeutil.ToPtr(true), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_ZIPCODE, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateZipcodeConfig{ + GenerateZipcodeConfig: &mgmtv1alpha1.GenerateZipcode{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_E164_PHONE_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformE164PhoneNumberConfig{ + TransformE164PhoneNumberConfig: &mgmtv1alpha1.TransformE164PhoneNumber{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_FIRST_NAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformFirstNameConfig{ + TransformFirstNameConfig: &mgmtv1alpha1.TransformFirstName{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_FLOAT64, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformFloat64Config{ + TransformFloat64Config: &mgmtv1alpha1.TransformFloat64{ + RandomizationRangeMin: gotypeutil.ToPtr(20.00), + RandomizationRangeMax: gotypeutil.ToPtr(50.00), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_FULL_NAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformFullNameConfig{ + TransformFullNameConfig: &mgmtv1alpha1.TransformFullName{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_INT64_PHONE_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformInt64PhoneNumberConfig{ + TransformInt64PhoneNumberConfig: &mgmtv1alpha1.TransformInt64PhoneNumber{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_INT64, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformInt64Config{ + TransformInt64Config: &mgmtv1alpha1.TransformInt64{ + RandomizationRangeMin: gotypeutil.ToPtr(int64(20)), + RandomizationRangeMax: gotypeutil.ToPtr(int64(50)), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_LAST_NAME, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformLastNameConfig{ + TransformLastNameConfig: &mgmtv1alpha1.TransformLastName{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_PHONE_NUMBER, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformPhoneNumberConfig{ + TransformPhoneNumberConfig: &mgmtv1alpha1.TransformPhoneNumber{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_STRING, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformStringConfig{ + TransformStringConfig: &mgmtv1alpha1.TransformString{ + PreserveLength: gotypeutil.ToPtr(false), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_CATEGORICAL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateCategoricalConfig{ + GenerateCategoricalConfig: &mgmtv1alpha1.GenerateCategorical{ + Categories: gotypeutil.ToPtr("value1,value2"), + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_CHARACTER_SCRAMBLE, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_TransformCharacterScrambleConfig{ + TransformCharacterScrambleConfig: &mgmtv1alpha1.TransformCharacterScramble{ + UserProvidedRegex: nil, + }, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_DEFAULT, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{ + GenerateDefaultConfig: &mgmtv1alpha1.GenerateDefault{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_NULL, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{ + Nullconfig: &mgmtv1alpha1.Null{}, + }, + }, + }, + { + Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_COUNTRY, + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateCountryConfig{ + GenerateCountryConfig: &mgmtv1alpha1.GenerateCountry{ + GenerateFullName: gotypeutil.ToPtr(false), + }, + }, + }, + }, + } + + emailColInfo := &sqlmanager_shared.ColumnInfo{ + OrdinalPosition: 2, + ColumnDefault: "", + IsNullable: true, + DataType: "timestamptz", + CharacterMaximumLength: shared.Ptr(40), + NumericPrecision: nil, + NumericScale: nil, + } + + for _, transformer := range transformers { + t.Run(fmt.Sprintf("%s_%s_lint", t.Name(), transformer.Source), func(t *testing.T) { + val, err := computeMutationFunction( + &mgmtv1alpha1.JobMapping{ + Column: "email", + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: transformer.Config, + }, + }, emailColInfo, false) + require.NoError(t, err) + ex, err := bloblang.Parse(val) + require.NoError(t, err, fmt.Sprintf("transformer lint failed, check that the transformer string is being constructed correctly. Failing source: %s", transformer.Source)) + _, err = ex.Query(nil) + require.NoError(t, err) + }) + } +} + +func Test_computeMutationFunction_Validate_Bloblang_Output_EmptyConfigs(t *testing.T) { + transformers := []*mgmtv1alpha1.SystemTransformer{ + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateEmailConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCardNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCityConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateE164PhoneNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFirstNameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFloat64Config{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFullAddressConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFullNameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateGenderConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64PhoneNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64Config{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateLastNameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateSha256HashConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateSsnConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStateConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStreetAddressConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStringPhoneNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStringConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUnixtimestampConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUsernameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUtctimestampConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUuidConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateZipcodeConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformE164PhoneNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFirstNameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFloat64Config{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformFullNameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformInt64PhoneNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformInt64Config{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformLastNameConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformPhoneNumberConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformStringConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCategoricalConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformCharacterScrambleConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{}}, + }, + { + Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCountryConfig{}}, + }, + } + + emailColInfo := &sqlmanager_shared.ColumnInfo{ + OrdinalPosition: 2, + ColumnDefault: "", + IsNullable: true, + DataType: "timestamptz", + CharacterMaximumLength: shared.Ptr(40), + NumericPrecision: nil, + NumericScale: nil, + } + + for _, transformer := range transformers { + t.Run(fmt.Sprintf("%s_%s_lint", t.Name(), transformer.Source), func(t *testing.T) { + val, err := computeMutationFunction( + &mgmtv1alpha1.JobMapping{ + Column: "email", + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: transformer.Config, + }, + }, emailColInfo, false) + require.NoError(t, err) + ex, err := bloblang.Parse(val) + require.NoError(t, err, fmt.Sprintf("transformer lint failed, check that the transformer string is being constructed correctly. Failing source: %s", transformer.Source)) + _, err = ex.Query(nil) + require.NoError(t, err) + }) + } +} + +func Test_computeMutationFunction_handles_Db_Maxlen(t *testing.T) { + type testcase struct { + jm *mgmtv1alpha1.JobMapping + ci *sqlmanager_shared.ColumnInfo + expected string + } + jm := &mgmtv1alpha1.JobMapping{ + Transformer: &mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateStringConfig{ + GenerateStringConfig: &mgmtv1alpha1.GenerateString{ + Min: gotypeutil.ToPtr(int64(2)), + Max: gotypeutil.ToPtr(int64(7)), + }, + }, + }, + }, + } + testcases := []testcase{ + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{}, + expected: "generate_string(min:2,max:7)", + }, + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{ + CharacterMaximumLength: nil, + }, + expected: "generate_string(min:2,max:7)", + }, + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{ + CharacterMaximumLength: shared.Ptr(-1), + }, + expected: "generate_string(min:2,max:7)", + }, + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{ + CharacterMaximumLength: shared.Ptr(0), + }, + expected: "generate_string(min:2,max:7)", + }, + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{ + CharacterMaximumLength: shared.Ptr(10), + }, + expected: "generate_string(min:2,max:7)", + }, + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{ + CharacterMaximumLength: shared.Ptr(3), + }, + expected: "generate_string(min:2,max:3)", + }, + { + jm: jm, + ci: &sqlmanager_shared.ColumnInfo{ + CharacterMaximumLength: shared.Ptr(1), + }, + expected: "generate_string(min:1,max:1)", + }, + } + + for _, tc := range testcases { + t.Run(t.Name(), func(t *testing.T) { + out, err := computeMutationFunction(tc.jm, tc.ci, false) + require.NoError(t, err) + require.NotNil(t, out) + require.Equal(t, tc.expected, out, "computed bloblang string was not expected") + ex, err := bloblang.Parse(out) + require.NoError(t, err) + _, err = ex.Query(nil) + require.NoError(t, err) + }) + } +} + +func Test_buildBranchCacheConfigs_null(t *testing.T) { + cols := []*mgmtv1alpha1.JobMapping{ + { + Schema: "public", + Table: "users", + Column: "user_id", + }, + } + + constraints := map[string][]*bb_internal.ReferenceKey{ + "name": { + { + Table: "public.orders", + Column: "buyer_id", + }, + }, + } + + resp, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, nil) + require.NoError(t, err) + require.Len(t, resp, 0) +} + +func Test_buildBranchCacheConfigs_missing_redis(t *testing.T) { + cols := []*mgmtv1alpha1.JobMapping{ + { + Schema: "public", + Table: "users", + Column: "user_id", + }, + } + + constraints := map[string][]*bb_internal.ReferenceKey{ + "user_id": { + { + Table: "public.orders", + Column: "buyer_id", + }, + }, + } + + _, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, nil) + require.Error(t, err) +} + +func Test_buildBranchCacheConfigs_success(t *testing.T) { + cols := []*mgmtv1alpha1.JobMapping{ + { + Schema: "public", + Table: "users", + Column: "user_id", + }, + { + Schema: "public", + Table: "users", + Column: "name", + }, + } + + constraints := map[string][]*bb_internal.ReferenceKey{ + "user_id": { + { + Table: "public.orders", + Column: "buyer_id", + }, + }, + } + redisConfig := &shared.RedisConfig{ + Url: "redis://localhost:6379", + Kind: "simple", + } + + resp, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, redisConfig) + + require.NoError(t, err) + require.Len(t, resp, 1) + require.Equal(t, *resp[0].RequestMap, `root = if this."user_id" == null { deleted() } else { this }`) + require.Equal(t, *resp[0].ResultMap, `root."user_id" = this`) +} + +func Test_buildBranchCacheConfigs_self_referencing(t *testing.T) { + cols := []*mgmtv1alpha1.JobMapping{ + { + Schema: "public", + Table: "users", + Column: "user_id", + }, + } + + constraints := map[string][]*bb_internal.ReferenceKey{ + "user_id": { + { + Table: "public.users", + Column: "other_id", + }, + }, + } + redisConfig := &shared.RedisConfig{ + Url: "redis://localhost:6379", + Kind: "simple", + } + + resp, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, redisConfig) + require.NoError(t, err) + require.Len(t, resp, 0) +} + +func Test_getPrimaryKeyDependencyMap(t *testing.T) { + tableDependencies := map[string][]*sqlmanager_shared.ForeignConstraint{ + "hr.countries": { + { + Columns: []string{"region_id"}, + NotNullable: []bool{true}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.regions", + Columns: []string{"region_id"}, + }, + }, + }, + "hr.departments": { + { + Columns: []string{"location_id"}, + NotNullable: []bool{false}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.locations", + Columns: []string{"location_id"}, + }, + }, + }, + "hr.dependents": { + { + Columns: []string{"employee_id"}, + NotNullable: []bool{true}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.employees", + Columns: []string{"employee_id"}, + }, + }, + }, + "hr.employees": { + { + Columns: []string{"job_id"}, + NotNullable: []bool{true}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.jobs", + Columns: []string{"job_id"}, + }, + }, + { + Columns: []string{"department_id"}, + NotNullable: []bool{false}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.departments", + Columns: []string{"department_id"}, + }, + }, + { + Columns: []string{"manager_id"}, + NotNullable: []bool{false}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.employees", + Columns: []string{"employee_id"}, + }, + }, + }, + "hr.locations": { + { + Columns: []string{"country_id"}, + NotNullable: []bool{true}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "hr.countries", + Columns: []string{"country_id"}, + }, + }, + }, + } + + expected := map[string]map[string][]*bb_internal.ReferenceKey{ + "hr.regions": { + "region_id": { + { + Table: "hr.countries", + Column: "region_id", + }, + }, + }, + "hr.locations": { + "location_id": { + { + Table: "hr.departments", + Column: "location_id", + }, + }, + }, + "hr.employees": { + "employee_id": { + { + Table: "hr.dependents", + Column: "employee_id", + }, + { + Table: "hr.employees", + Column: "manager_id", + }, + }, + }, + "hr.jobs": { + "job_id": { + { + Table: "hr.employees", + Column: "job_id", + }, + }, + }, + "hr.departments": { + "department_id": { + { + Table: "hr.employees", + Column: "department_id", + }, + }, + }, + "hr.countries": { + "country_id": { + { + Table: "hr.locations", + Column: "country_id", + }, + }, + }, + } + + actual := getPrimaryKeyDependencyMap(tableDependencies) + for table, depsMap := range expected { + actualDepsMap := actual[table] + require.NotNil(t, actualDepsMap) + for col, deps := range depsMap { + actualDeps := actualDepsMap[col] + require.ElementsMatch(t, deps, actualDeps) + } + } +} + +func Test_getPrimaryKeyDependencyMap_compositekeys(t *testing.T) { + tableDependencies := map[string][]*sqlmanager_shared.ForeignConstraint{ + "employees": { + { + Columns: []string{"department_id"}, + NotNullable: []bool{false}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "department", + Columns: []string{"department_id"}, + }, + }, + }, + "projects": { + { + Columns: []string{"responsible_employee_id", "responsible_department_id"}, + NotNullable: []bool{true}, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: "employees", + Columns: []string{"employee_id", "department_id"}, + }, + }, + }, + } + + expected := map[string]map[string][]*bb_internal.ReferenceKey{ + "department": { + "department_id": { + { + Table: "employees", + Column: "department_id", + }, + }, + }, + "employees": { + "employee_id": {{ + Table: "projects", + Column: "responsible_employee_id", + }}, + "department_id": {{ + Table: "projects", + Column: "responsible_department_id", + }}, + }, + } + + actual := getPrimaryKeyDependencyMap(tableDependencies) + require.Equal(t, expected, actual) +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb.go b/internal/benthos/benthos-builder/builders/dynamodb.go similarity index 54% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb.go rename to internal/benthos/benthos-builder/builders/dynamodb.go index 333e1cc0d4..6e8bcfa30f 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb.go +++ b/internal/benthos/benthos-builder/builders/dynamodb.go @@ -1,38 +1,37 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "context" + "errors" "fmt" - "log/slog" "slices" mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" "github.com/nucleuscloud/neosync/backend/pkg/metrics" sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" awsmanager "github.com/nucleuscloud/neosync/internal/aws" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" ) -type dynamoSyncResp struct { - BenthosConfigs []*BenthosConfigResponse +type dyanmodbSyncBuilder struct { + transformerclient mgmtv1alpha1connect.TransformersServiceClient } -func (b *benthosBuilder) getDynamoDbSyncBenthosConfigResponses( - ctx context.Context, - job *mgmtv1alpha1.Job, - slogger *slog.Logger, -) (*dynamoSyncResp, error) { - sourceConnection, err := shared.GetJobSourceConnection(ctx, job.GetSource(), b.connclient) - if err != nil { - return nil, fmt.Errorf("unable to get source connection by id: %w", err) +func NewDynamoDbSyncBuilder( + transformerclient mgmtv1alpha1connect.TransformersServiceClient, +) bb_internal.BenthosBuilder { + return &dyanmodbSyncBuilder{ + transformerclient: transformerclient, } - sourceConnectionType := shared.GetConnectionType(sourceConnection) - slogger = slogger.With( - "sourceConnectionType", sourceConnectionType, - ) - _ = slogger +} + +func (b *dyanmodbSyncBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + sourceConnection := params.SourceConnection + job := params.Job dynamoSourceConfig := sourceConnection.GetConnectionConfig().GetDynamodbConfig() if dynamoSourceConfig == nil { @@ -49,7 +48,7 @@ func (b *benthosBuilder) getDynamoDbSyncBenthosConfigResponses( groupedMappings := groupMappingsByTable(job.GetMappings()) - benthosConfigs := []*BenthosConfigResponse{} + benthosConfigs := []*bb_internal.BenthosSourceConfig{} // todo: may need to filter here based on the destination config mappings if there is no source->destination table map for _, tableMapping := range groupedMappings { bc := &neosync_benthos.BenthosConfig{ @@ -102,10 +101,10 @@ func (b *benthosBuilder) getDynamoDbSyncBenthosConfigResponses( ctx, b.transformerclient, tabledependency.NewRunConfig(tableMapping.Table, tabledependency.RunTypeInsert, []string{}, nil, columns, columns, nil, splitColumnPaths), - map[string][]*referenceKey{}, - map[string][]*referenceKey{}, - b.jobId, - b.runId, + map[string][]*bb_internal.ReferenceKey{}, + map[string][]*bb_internal.ReferenceKey{}, + params.Job.Id, + params.RunId, &shared.RedisConfig{}, tableMapping.Mappings, map[string]*sqlmanager_shared.ColumnInfo{}, @@ -119,7 +118,7 @@ func (b *benthosBuilder) getDynamoDbSyncBenthosConfigResponses( bc.StreamConfig.Pipeline.Processors = append(bc.StreamConfig.Pipeline.Processors, *pc) } - benthosConfigs = append(benthosConfigs, &BenthosConfigResponse{ + benthosConfigs = append(benthosConfigs, &bb_internal.BenthosSourceConfig{ Config: bc, Name: fmt.Sprintf("%s.%s", tableMapping.Schema, tableMapping.Table), // todo TableSchema: tableMapping.Schema, @@ -128,8 +127,7 @@ func (b *benthosBuilder) getDynamoDbSyncBenthosConfigResponses( DependsOn: []*tabledependency.DependsOn{}, Columns: columns, - SourceConnectionType: sourceConnectionType, - metriclabels: metrics.MetricLabels{ + Metriclabels: metrics.MetricLabels{ metrics.NewEqLabel(metrics.TableSchemaLabel, tableMapping.Schema), metrics.NewEqLabel(metrics.TableNameLabel, tableMapping.Table), metrics.NewEqLabel(metrics.JobTypeLabel, "sync"), @@ -137,9 +135,54 @@ func (b *benthosBuilder) getDynamoDbSyncBenthosConfigResponses( }) } - return &dynamoSyncResp{ - BenthosConfigs: benthosConfigs, - }, nil + return benthosConfigs, nil +} + +func (b *dyanmodbSyncBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + config := &bb_internal.BenthosDestinationConfig{} + + benthosConfig := params.SourceConfig + destinationOpts := params.DestinationOpts + + dynamoConfig := params.DestConnection.GetConnectionConfig().GetDynamodbConfig() + if dynamoConfig == nil { + return nil, errors.New("destination must have configured dyanmodb config") + } + dynamoDestinationOpts := destinationOpts.GetDynamodbOptions() + if dynamoDestinationOpts == nil { + return nil, errors.New("destination must have configured dyanmodb options") + } + tableMap := map[string]string{} + for _, tm := range dynamoDestinationOpts.GetTableMappings() { + tableMap[tm.GetSourceTable()] = tm.GetDestinationTable() + } + mappedTable, ok := tableMap[benthosConfig.TableName] + if !ok { + return nil, fmt.Errorf("did not find table map for %q when building dynamodb destination config", benthosConfig.TableName) + } + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + AwsDynamoDB: &neosync_benthos.OutputAwsDynamoDB{ + Table: mappedTable, + JsonMapColumns: map[string]string{ + "": ".", + }, + + Batching: &neosync_benthos.Batching{ + // https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html + // A single call to BatchWriteItem can transmit up to 16MB of data over the network, consisting of up to 25 item put or delete operations + // Specifying the count here may not be enough if the overall data is above 16MB. + // Benthos will fall back on error to single writes however + Period: "5s", + Count: 25, + }, + + Region: dynamoConfig.GetRegion(), + Endpoint: dynamoConfig.GetEndpoint(), + Credentials: buildBenthosS3Credentials(dynamoConfig.GetCredentials()), + }, + }) + + return config, nil } func getWhereFromSourceTableOption(opt *mgmtv1alpha1.DynamoDBSourceTableOption) *string { @@ -156,3 +199,33 @@ func toDynamoDbSourceTableOptionMap(tableOpts []*mgmtv1alpha1.DynamoDBSourceTabl } return output } + +func buildBenthosS3Credentials(mgmtCreds *mgmtv1alpha1.AwsS3Credentials) *neosync_benthos.AwsCredentials { + if mgmtCreds == nil { + return nil + } + creds := &neosync_benthos.AwsCredentials{} + if mgmtCreds.Profile != nil { + creds.Profile = *mgmtCreds.Profile + } + if mgmtCreds.AccessKeyId != nil { + creds.Id = *mgmtCreds.AccessKeyId + } + if mgmtCreds.SecretAccessKey != nil { + creds.Secret = *mgmtCreds.SecretAccessKey + } + if mgmtCreds.SessionToken != nil { + creds.Token = *mgmtCreds.SessionToken + } + if mgmtCreds.FromEc2Role != nil { + creds.FromEc2Role = *mgmtCreds.FromEc2Role + } + if mgmtCreds.RoleArn != nil { + creds.Role = *mgmtCreds.RoleArn + } + if mgmtCreds.RoleExternalId != nil { + creds.RoleExternalId = *mgmtCreds.RoleExternalId + } + + return creds +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb_test.go b/internal/benthos/benthos-builder/builders/dynamodb_test.go similarity index 98% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb_test.go rename to internal/benthos/benthos-builder/builders/dynamodb_test.go index 6ae78b983c..fb39251786 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-dynamodb_test.go +++ b/internal/benthos/benthos-builder/builders/dynamodb_test.go @@ -1,4 +1,4 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "reflect" diff --git a/internal/benthos/benthos-builder/builders/gcp-cloud-storage.go b/internal/benthos/benthos-builder/builders/gcp-cloud-storage.go new file mode 100644 index 0000000000..2c7079c662 --- /dev/null +++ b/internal/benthos/benthos-builder/builders/gcp-cloud-storage.go @@ -0,0 +1,88 @@ +package benthosbuilder_builders + +import ( + "context" + "errors" + "strings" + + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" + "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" +) + +type gcpCloudStorageSyncBuilder struct { +} + +func NewGcpCloudStorageSyncBuilder() bb_internal.BenthosBuilder { + return &gcpCloudStorageSyncBuilder{} +} + +func (b *gcpCloudStorageSyncBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + return nil, errors.ErrUnsupported +} + +func (b *gcpCloudStorageSyncBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + config := &bb_internal.BenthosDestinationConfig{} + + benthosConfig := params.SourceConfig + if benthosConfig.RunType == tabledependency.RunTypeUpdate { + return config, nil + } + destinationOpts := params.DestinationOpts.GetAwsS3Options() + gcpCloudStorageConfig := params.DestConnection.GetConnectionConfig().GetGcpCloudstorageConfig() + + if destinationOpts == nil { + return nil, errors.New("destination must have configured GCP Cloud Storage options") + } + if gcpCloudStorageConfig == nil { + return nil, errors.New("destination must have configured GCP Cloud Storage config") + } + + pathpieces := []string{} + if gcpCloudStorageConfig.GetPathPrefix() != "" { + pathpieces = append(pathpieces, strings.Trim(gcpCloudStorageConfig.GetPathPrefix(), "/")) + } + + pathpieces = append( + pathpieces, + "workflows", + params.RunId, + "activities", + neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName), + "data", + `${!count("files")}.txt.gz`, + ) + + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + Fallback: []neosync_benthos.Outputs{ + { + GcpCloudStorage: &neosync_benthos.GcpCloudStorageOutput{ + Bucket: gcpCloudStorageConfig.GetBucket(), + MaxInFlight: 64, + Path: strings.Join(pathpieces, "/"), + ContentType: shared.Ptr("txt/plain"), + ContentEncoding: shared.Ptr("gzip"), + Batching: &neosync_benthos.Batching{ + Count: 100, + Period: "5s", + Processors: []*neosync_benthos.BatchProcessor{ + {Archive: &neosync_benthos.ArchiveProcessor{Format: "lines"}}, + {Compress: &neosync_benthos.CompressProcessor{Algorithm: "gzip"}}, + }, + }, + }, + }, + // kills activity depending on error + {Error: &neosync_benthos.ErrorOutputConfig{ + ErrorMsg: `${! meta("fallback_error")}`, + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }}, + }, + }) + + return config, nil +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/ai-generate.go b/internal/benthos/benthos-builder/builders/generate-ai.go similarity index 56% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/ai-generate.go rename to internal/benthos/benthos-builder/builders/generate-ai.go index 2e6a53c9b9..24eddcb916 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/ai-generate.go +++ b/internal/benthos/benthos-builder/builders/generate-ai.go @@ -1,20 +1,45 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "context" "errors" "fmt" - "log/slog" mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" "github.com/nucleuscloud/neosync/backend/pkg/metrics" + "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" ) +type generateAIBuilder struct { + transformerclient mgmtv1alpha1connect.TransformersServiceClient + sqlmanagerclient sqlmanager.SqlManagerClient + connectionclient mgmtv1alpha1connect.ConnectionServiceClient + driver string + aiGroupedTableCols map[string][]string +} + +func NewGenerateAIBuilder( + transformerclient mgmtv1alpha1connect.TransformersServiceClient, + sqlmanagerclient sqlmanager.SqlManagerClient, + connectionclient mgmtv1alpha1connect.ConnectionServiceClient, + driver string, +) bb_internal.BenthosBuilder { + return &generateAIBuilder{ + transformerclient: transformerclient, + sqlmanagerclient: sqlmanagerclient, + connectionclient: connectionclient, + driver: driver, + aiGroupedTableCols: map[string][]string{}, + } +} + type aiGenerateMappings struct { Schema string Table string @@ -26,41 +51,31 @@ type aiGenerateColumn struct { DataType string } -func (b *benthosBuilder) getAiGenerateBenthosConfigResponses( - ctx context.Context, - job *mgmtv1alpha1.Job, - slogger *slog.Logger, -) ([]*BenthosConfigResponse, map[string][]string, error) { - jobSource := job.GetSource() - sourceOptions := job.GetSource().GetOptions().GetAiGenerate() +func (b *generateAIBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + jobSource := params.Job.GetSource() + sourceOptions := jobSource.GetOptions().GetAiGenerate() if sourceOptions == nil { - return nil, nil, fmt.Errorf("job does not have AiGenerate source options, has: %T", jobSource.GetOptions().Config) - } - sourceConnection, err := shared.GetJobSourceConnection(ctx, job.GetSource(), b.connclient) - if err != nil { - return nil, nil, err + return nil, fmt.Errorf("job does not have AiGenerate source options, has: %T", jobSource.GetOptions().Config) } - sourceConnectionType := shared.GetConnectionType(sourceConnection) - slogger = slogger.With( - "sourceConnectionType", sourceConnectionType, - ) + sourceConnection := params.SourceConnection + openaiConfig := sourceConnection.GetConnectionConfig().GetOpenaiConfig() if openaiConfig == nil { - return nil, nil, errors.New("configured source connection is not an openai configuration") + return nil, errors.New("configured source connection is not an openai configuration") } - constraintConnection, err := getConstraintConnection(ctx, jobSource, b.connclient, shared.GetConnectionById) + constraintConnection, err := getConstraintConnection(ctx, jobSource, b.connectionclient, shared.GetConnectionById) if err != nil { - return nil, nil, err + return nil, err } - db, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, slogger, constraintConnection) + db, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, params.Logger, constraintConnection) if err != nil { - return nil, nil, fmt.Errorf("unable to create new sql db: %w", err) + return nil, fmt.Errorf("unable to create new sql db: %w", err) } defer db.Db.Close() groupedSchemas, err := db.Db.GetSchemaColumnMap(ctx) if err != nil { - return nil, nil, fmt.Errorf("unable to get database schema for connection: %w", err) + return nil, fmt.Errorf("unable to get database schema for connection: %w", err) } mappings := []*aiGenerateMappings{} @@ -70,7 +85,7 @@ func (b *benthosBuilder) getAiGenerateBenthosConfigResponses( tableColsMap, ok := groupedSchemas[sqlmanager_shared.BuildTable(schema.GetSchema(), table.GetTable())] if !ok { - return nil, nil, fmt.Errorf("did not find schema data when building AI Generate config: %s", schema.GetSchema()) + return nil, fmt.Errorf("did not find schema data when building AI Generate config: %s", schema.GetSchema()) } for col, info := range tableColsMap { columns = append(columns, &aiGenerateColumn{ @@ -88,7 +103,7 @@ func (b *benthosBuilder) getAiGenerateBenthosConfigResponses( } } if len(mappings) == 0 { - return nil, nil, fmt.Errorf("did not generate any mapping configs during AI Generate build for connection: %s", constraintConnection.GetId()) + return nil, fmt.Errorf("did not generate any mapping configs during AI Generate build for connection: %s", constraintConnection.GetId()) } var userPrompt *string @@ -107,7 +122,6 @@ func (b *benthosBuilder) getAiGenerateBenthosConfigResponses( sourceOptions.GetModelName(), userPrompt, userBatchSize, - sourceConnectionType, ) // builds a map of table key to columns for AI Generated schemas as they are calculated lazily instead of via job mappings @@ -118,8 +132,9 @@ func (b *benthosBuilder) getAiGenerateBenthosConfigResponses( aiGroupedTableCols[key] = append(aiGroupedTableCols[key], col.Column) } } + b.aiGroupedTableCols = aiGroupedTableCols - return sourceResponses, aiGroupedTableCols, nil + return sourceResponses, nil } func buildBenthosAiGenerateSourceConfigResponses( @@ -128,9 +143,8 @@ func buildBenthosAiGenerateSourceConfigResponses( model string, userPrompt *string, userBatchSize *int, - sourceConnectionType string, -) []*BenthosConfigResponse { - responses := []*BenthosConfigResponse{} +) []*bb_internal.BenthosSourceConfig { + responses := []*bb_internal.BenthosSourceConfig{} for _, tableMapping := range mappings { columns := []string{} @@ -177,7 +191,7 @@ func buildBenthosAiGenerateSourceConfigResponses( }, } - responses = append(responses, &BenthosConfigResponse{ + responses = append(responses, &bb_internal.BenthosSourceConfig{ Name: neosync_benthos.BuildBenthosTable(tableMapping.Schema, tableMapping.Table), // todo: may need to expand on this Config: bc, DependsOn: []*tabledependency.DependsOn{}, @@ -185,8 +199,7 @@ func buildBenthosAiGenerateSourceConfigResponses( TableSchema: tableMapping.Schema, TableName: tableMapping.Table, - SourceConnectionType: sourceConnectionType, - metriclabels: metrics.MetricLabels{ + Metriclabels: metrics.MetricLabels{ metrics.NewEqLabel(metrics.TableSchemaLabel, tableMapping.Schema), metrics.NewEqLabel(metrics.TableNameLabel, tableMapping.Table), metrics.NewEqLabel(metrics.JobTypeLabel, "ai-generate"), @@ -197,6 +210,70 @@ func buildBenthosAiGenerateSourceConfigResponses( return responses } +func (b *generateAIBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + config := &bb_internal.BenthosDestinationConfig{} + + benthosConfig := params.SourceConfig + destOpts := getDestinationOptions(params.DestinationOpts) + tableKey := neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName) + + cols, ok := b.aiGroupedTableCols[tableKey] + if !ok { + return nil, fmt.Errorf("unable to find table columns for key (%s) when building destination connection", tableKey) + } + + processorConfigs := []neosync_benthos.ProcessorConfig{} + for _, pc := range benthosConfig.Processors { + processorConfigs = append(processorConfigs, *pc) + } + + config.BenthosDsns = append(config.BenthosDsns, &bb_shared.BenthosDsn{EnvVarKey: params.DestEnvVarKey, ConnectionId: params.DestConnection.Id}) + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + Fallback: []neosync_benthos.Outputs{ + { + // retry processor and output several times + Retry: &neosync_benthos.RetryConfig{ + InlineRetryConfig: neosync_benthos.InlineRetryConfig{ + MaxRetries: 10, + }, + Output: neosync_benthos.OutputConfig{ + Outputs: neosync_benthos.Outputs{ + PooledSqlInsert: &neosync_benthos.PooledSqlInsert{ + Driver: b.driver, + Dsn: params.DSN, + + Schema: benthosConfig.TableSchema, + Table: benthosConfig.TableName, + Columns: cols, + OnConflictDoNothing: destOpts.OnConflictDoNothing, + TruncateOnRetry: destOpts.Truncate, + + ArgsMapping: buildPlainInsertArgs(cols), + + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }, + }, + Processors: processorConfigs, + }, + }, + }, + // kills activity depending on error + {Error: &neosync_benthos.ErrorOutputConfig{ + ErrorMsg: `${! meta("fallback_error")}`, + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }}, + }, + }) + + return config, nil +} + func getConstraintConnection( ctx context.Context, jobSource *mgmtv1alpha1.JobSource, diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/generate.go b/internal/benthos/benthos-builder/builders/generate.go similarity index 55% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/generate.go rename to internal/benthos/benthos-builder/builders/generate.go index 5216b14c37..65e2be0387 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/generate.go +++ b/internal/benthos/benthos-builder/builders/generate.go @@ -1,39 +1,57 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "context" "fmt" - "log/slog" mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" "github.com/nucleuscloud/neosync/backend/pkg/metrics" - sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" ) -func (b *benthosBuilder) getGenerateBenthosConfigResponses( - ctx context.Context, - job *mgmtv1alpha1.Job, - slogger *slog.Logger, -) ([]*BenthosConfigResponse, error) { +type generateBuilder struct { + transformerclient mgmtv1alpha1connect.TransformersServiceClient + sqlmanagerclient sqlmanager.SqlManagerClient + connectionclient mgmtv1alpha1connect.ConnectionServiceClient + driver string +} + +func NewGenerateBuilder( + transformerclient mgmtv1alpha1connect.TransformersServiceClient, + sqlmanagerclient sqlmanager.SqlManagerClient, + connectionclient mgmtv1alpha1connect.ConnectionServiceClient, + driver string, +) bb_internal.BenthosBuilder { + return &generateBuilder{ + transformerclient: transformerclient, + sqlmanagerclient: sqlmanagerclient, + connectionclient: connectionclient, + driver: driver, + } +} + +func (b *generateBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + logger := params.Logger + job := params.Job + configs := []*bb_internal.BenthosSourceConfig{} + jobSource := job.GetSource() - sourceOptions := job.GetSource().GetOptions().GetGenerate() + sourceOptions := jobSource.GetOptions().GetGenerate() if sourceOptions == nil { return nil, fmt.Errorf("job does not have Generate source options, has: %T", jobSource.GetOptions().Config) } - sourceConnection, err := shared.GetJobSourceConnection(ctx, job.GetSource(), b.connclient) + sourceConnection, err := shared.GetJobSourceConnection(ctx, jobSource, b.connectionclient) if err != nil { return nil, fmt.Errorf("unable to get connection by id: %w", err) } - sourceConnectionType := shared.GetConnectionType(sourceConnection) - slogger = slogger.With( - "sourceConnectionType", sourceConnectionType, - ) - db, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, slogger, sourceConnection) + db, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, logger, sourceConnection) if err != nil { return nil, fmt.Errorf("unable to create new sql db: %w", err) } @@ -48,32 +66,7 @@ func (b *benthosBuilder) getGenerateBenthosConfigResponses( return nil, fmt.Errorf("unable to get database schema for connection: %w", err) } - sourceResponses, err := buildBenthosGenerateSourceConfigResponses(slogger, ctx, b.transformerclient, groupedMappings, sourceTableOpts, groupedSchemas, colTransformerMap, db.Driver, sourceConnectionType) - if err != nil { - return nil, fmt.Errorf("unable to build benthos generate source config responses: %w", err) - } - - return sourceResponses, nil -} - -type generateSourceTableOptions struct { - Count int -} - -func buildBenthosGenerateSourceConfigResponses( - slogger *slog.Logger, - ctx context.Context, - transformerclient mgmtv1alpha1connect.TransformersServiceClient, - mappings []*tableMapping, - sourceTableOpts map[string]*generateSourceTableOptions, - groupedcolumnInfo map[string]map[string]*sqlmanager_shared.ColumnInfo, - groupedColTransformers map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, - driver string, - sourceConnectionType string, -) ([]*BenthosConfigResponse, error) { - responses := []*BenthosConfigResponse{} - - for _, tableMapping := range mappings { + for _, tableMapping := range groupedMappings { tableName := neosync_benthos.BuildBenthosTable(tableMapping.Schema, tableMapping.Table) var count = 0 tableOpt := sourceTableOpts[tableName] @@ -81,21 +74,21 @@ func buildBenthosGenerateSourceConfigResponses( count = tableOpt.Count } - tableColInfo, ok := groupedcolumnInfo[tableName] + tableColInfo, ok := groupedSchemas[tableName] if !ok { return nil, fmt.Errorf("missing table column info") } - tableColTransformers, ok := groupedColTransformers[tableName] + tableColTransformers, ok := colTransformerMap[tableName] if !ok { return nil, fmt.Errorf("missing table column transformers mapping") } - jsCode, err := extractJsFunctionsAndOutputs(ctx, transformerclient, tableMapping.Mappings) + jsCode, err := extractJsFunctionsAndOutputs(ctx, b.transformerclient, tableMapping.Mappings) if err != nil { return nil, err } - mutations, err := buildMutationConfigs(ctx, transformerclient, tableMapping.Mappings, tableColInfo, false) + mutations, err := buildMutationConfigs(ctx, b.transformerclient, tableMapping.Mappings, tableColInfo, false) if err != nil { return nil, err } @@ -146,12 +139,12 @@ func buildBenthosGenerateSourceConfigResponses( } columns := buildPlainColumns(tableMapping.Mappings) - columnDefaultProperties, err := getColumnDefaultProperties(slogger, driver, columns, tableColInfo, tableColTransformers) + columnDefaultProperties, err := getColumnDefaultProperties(logger, db.Driver, columns, tableColInfo, tableColTransformers) if err != nil { return nil, err } - responses = append(responses, &BenthosConfigResponse{ + configs = append(configs, &bb_internal.BenthosSourceConfig{ Name: neosync_benthos.BuildBenthosTable(tableMapping.Schema, tableMapping.Table), // todo: may need to expand on this Config: bc, DependsOn: []*tabledependency.DependsOn{}, @@ -163,8 +156,7 @@ func buildBenthosGenerateSourceConfigResponses( Processors: processors, - SourceConnectionType: sourceConnectionType, - metriclabels: metrics.MetricLabels{ + Metriclabels: metrics.MetricLabels{ metrics.NewEqLabel(metrics.TableSchemaLabel, tableMapping.Schema), metrics.NewEqLabel(metrics.TableNameLabel, tableMapping.Table), metrics.NewEqLabel(metrics.JobTypeLabel, "generate"), @@ -172,24 +164,22 @@ func buildBenthosGenerateSourceConfigResponses( }) } - return responses, nil + return configs, nil } -func (b *benthosBuilder) getSqlGenerateOutput( - driver string, - benthosConfig *BenthosConfigResponse, - destination *mgmtv1alpha1.JobDestination, - dsn string, -) []neosync_benthos.Outputs { - outputs := []neosync_benthos.Outputs{} - destOpts := getDestinationOptions(destination) +func (b *generateBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + config := &bb_internal.BenthosDestinationConfig{} + + benthosConfig := params.SourceConfig + destOpts := getDestinationOptions(params.DestinationOpts) processorConfigs := []neosync_benthos.ProcessorConfig{} for _, pc := range benthosConfig.Processors { processorConfigs = append(processorConfigs, *pc) } - outputs = append(outputs, neosync_benthos.Outputs{ + config.BenthosDsns = append(config.BenthosDsns, &bb_shared.BenthosDsn{EnvVarKey: params.DestEnvVarKey, ConnectionId: params.DestConnection.Id}) + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ Fallback: []neosync_benthos.Outputs{ { // retry processor and output several times @@ -200,8 +190,8 @@ func (b *benthosBuilder) getSqlGenerateOutput( Output: neosync_benthos.OutputConfig{ Outputs: neosync_benthos.Outputs{ PooledSqlInsert: &neosync_benthos.PooledSqlInsert{ - Driver: driver, - Dsn: dsn, + Driver: b.driver, + Dsn: params.DSN, Schema: benthosConfig.TableSchema, Table: benthosConfig.TableName, @@ -233,72 +223,28 @@ func (b *benthosBuilder) getSqlGenerateOutput( }, }) - return outputs + return config, nil } -func (b *benthosBuilder) getSqlAiGenerateOutput( - driver string, - benthosConfig *BenthosConfigResponse, - destination *mgmtv1alpha1.JobDestination, - dsn string, - aiGroupedTableCols map[string][]string, -) ([]neosync_benthos.Outputs, error) { - outputs := []neosync_benthos.Outputs{} - destOpts := getDestinationOptions(destination) - tableKey := neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName) - - cols, ok := aiGroupedTableCols[tableKey] - if !ok { - return nil, fmt.Errorf("unable to find table columns for key (%s) when building destination connection", tableKey) - } +type generateSourceTableOptions struct { + Count int +} - processorConfigs := []neosync_benthos.ProcessorConfig{} - for _, pc := range benthosConfig.Processors { - processorConfigs = append(processorConfigs, *pc) +func groupGenerateSourceOptionsByTable( + schemaOptions []*mgmtv1alpha1.GenerateSourceSchemaOption, +) map[string]*generateSourceTableOptions { + groupedMappings := map[string]*generateSourceTableOptions{} + + for idx := range schemaOptions { + schemaOpt := schemaOptions[idx] + for tidx := range schemaOpt.Tables { + tableOpt := schemaOpt.Tables[tidx] + key := neosync_benthos.BuildBenthosTable(schemaOpt.Schema, tableOpt.Table) + groupedMappings[key] = &generateSourceTableOptions{ + Count: int(tableOpt.RowCount), // todo: probably need to update rowcount int64 to int32 + } + } } - outputs = append(outputs, neosync_benthos.Outputs{ - Fallback: []neosync_benthos.Outputs{ - { - // retry processor and output several times - Retry: &neosync_benthos.RetryConfig{ - InlineRetryConfig: neosync_benthos.InlineRetryConfig{ - MaxRetries: 10, - }, - Output: neosync_benthos.OutputConfig{ - Outputs: neosync_benthos.Outputs{ - PooledSqlInsert: &neosync_benthos.PooledSqlInsert{ - Driver: driver, - Dsn: dsn, - - Schema: benthosConfig.TableSchema, - Table: benthosConfig.TableName, - Columns: cols, - OnConflictDoNothing: destOpts.OnConflictDoNothing, - TruncateOnRetry: destOpts.Truncate, - - ArgsMapping: buildPlainInsertArgs(cols), - - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }, - }, - Processors: processorConfigs, - }, - }, - }, - // kills activity depending on error - {Error: &neosync_benthos.ErrorOutputConfig{ - ErrorMsg: `${! meta("fallback_error")}`, - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }}, - }, - }) - - return outputs, nil + return groupedMappings } diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-mongo.go b/internal/benthos/benthos-builder/builders/mongodb.go similarity index 50% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-mongo.go rename to internal/benthos/benthos-builder/builders/mongodb.go index f62f526b9e..5bfbf6814b 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync-mongo.go +++ b/internal/benthos/benthos-builder/builders/mongodb.go @@ -1,40 +1,37 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "context" "fmt" - "log/slog" - mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" "github.com/nucleuscloud/neosync/backend/pkg/metrics" sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" ) -type mongoSyncResp struct { - BenthosConfigs []*BenthosConfigResponse +type mongodbSyncBuilder struct { + transformerclient mgmtv1alpha1connect.TransformersServiceClient } -func (b *benthosBuilder) getMongoDbSyncBenthosConfigResponses( - ctx context.Context, - job *mgmtv1alpha1.Job, - slogger *slog.Logger, -) (*mongoSyncResp, error) { - sourceConnection, err := shared.GetJobSourceConnection(ctx, job.GetSource(), b.connclient) - if err != nil { - return nil, fmt.Errorf("unable to get source connection by id: %w", err) +func NewMongoDbSyncBuilder( + transformerclient mgmtv1alpha1connect.TransformersServiceClient, +) bb_internal.BenthosBuilder { + return &mongodbSyncBuilder{ + transformerclient: transformerclient, } - sourceConnectionType := shared.GetConnectionType(sourceConnection) - slogger = slogger.With( - "sourceConnectionType", sourceConnectionType, - ) - _ = slogger +} +func (b *mongodbSyncBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + sourceConnection := params.SourceConnection + job := params.Job groupedMappings := groupMappingsByTable(job.GetMappings()) - benthosConfigs := []*BenthosConfigResponse{} + benthosConfigs := []*bb_internal.BenthosSourceConfig{} for _, tableMapping := range groupedMappings { bc := &neosync_benthos.BenthosConfig{ StreamConfig: neosync_benthos.StreamConfig{ @@ -53,7 +50,12 @@ func (b *benthosBuilder) getMongoDbSyncBenthosConfigResponses( Processors: []neosync_benthos.ProcessorConfig{}, }, Output: &neosync_benthos.OutputConfig{ - Outputs: neosync_benthos.Outputs{}, + Outputs: neosync_benthos.Outputs{ + Broker: &neosync_benthos.OutputBrokerConfig{ + Pattern: "fan_out", + Outputs: []neosync_benthos.Outputs{}, + }, + }, }, }, } @@ -68,10 +70,10 @@ func (b *benthosBuilder) getMongoDbSyncBenthosConfigResponses( ctx, b.transformerclient, tabledependency.NewRunConfig(tableMapping.Table, tabledependency.RunTypeInsert, []string{}, nil, columns, columns, nil, splitColumnPaths), - map[string][]*referenceKey{}, - map[string][]*referenceKey{}, - b.jobId, - b.runId, + map[string][]*bb_internal.ReferenceKey{}, + map[string][]*bb_internal.ReferenceKey{}, + params.Job.Id, + params.RunId, &shared.RedisConfig{}, tableMapping.Mappings, map[string]*sqlmanager_shared.ColumnInfo{}, @@ -85,7 +87,7 @@ func (b *benthosBuilder) getMongoDbSyncBenthosConfigResponses( bc.StreamConfig.Pipeline.Processors = append(bc.StreamConfig.Pipeline.Processors, *pc) } - benthosConfigs = append(benthosConfigs, &BenthosConfigResponse{ + benthosConfigs = append(benthosConfigs, &bb_internal.BenthosSourceConfig{ Config: bc, Name: fmt.Sprintf("%s.%s", tableMapping.Schema, tableMapping.Table), // todo TableSchema: tableMapping.Schema, @@ -93,10 +95,9 @@ func (b *benthosBuilder) getMongoDbSyncBenthosConfigResponses( RunType: tabledependency.RunTypeInsert, DependsOn: []*tabledependency.DependsOn{}, Columns: columns, - BenthosDsns: []*shared.BenthosDsn{{ConnectionId: sourceConnection.GetId(), EnvVarKey: "SOURCE_CONNECTION_DSN"}}, + BenthosDsns: []*bb_shared.BenthosDsn{{ConnectionId: sourceConnection.GetId(), EnvVarKey: "SOURCE_CONNECTION_DSN"}}, - SourceConnectionType: sourceConnectionType, - metriclabels: metrics.MetricLabels{ + Metriclabels: metrics.MetricLabels{ metrics.NewEqLabel(metrics.TableSchemaLabel, tableMapping.Schema), metrics.NewEqLabel(metrics.TableNameLabel, tableMapping.Table), metrics.NewEqLabel(metrics.JobTypeLabel, "sync"), @@ -104,7 +105,33 @@ func (b *benthosBuilder) getMongoDbSyncBenthosConfigResponses( }) } - return &mongoSyncResp{ - BenthosConfigs: benthosConfigs, - }, nil + return benthosConfigs, nil +} + +func (b *mongodbSyncBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + config := &bb_internal.BenthosDestinationConfig{} + + benthosConfig := params.SourceConfig + config.BenthosDsns = append(config.BenthosDsns, &bb_shared.BenthosDsn{EnvVarKey: params.DestEnvVarKey, ConnectionId: params.DestConnection.GetId()}) + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{PooledMongoDB: &neosync_benthos.OutputMongoDb{ + Url: params.DSN, + + Database: benthosConfig.TableSchema, + Collection: benthosConfig.TableName, + Operation: "update-one", + Upsert: true, + DocumentMap: ` + root = { + "$set": this + } + `, + FilterMap: ` + root._id = this._id + `, + WriteConcern: &neosync_benthos.MongoWriteConcern{ + W: "1", + }, + }, + }) + return config, nil } diff --git a/internal/benthos/benthos-builder/builders/neosync-connection-data.go b/internal/benthos/benthos-builder/builders/neosync-connection-data.go new file mode 100644 index 0000000000..28cf0b3a3e --- /dev/null +++ b/internal/benthos/benthos-builder/builders/neosync-connection-data.go @@ -0,0 +1,102 @@ +package benthosbuilder_builders + +import ( + "context" + "errors" + "fmt" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" + "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +) + +type neosyncConnectionDataBuilder struct { + connectiondataclient mgmtv1alpha1connect.ConnectionDataServiceClient + sqlmanagerclient sqlmanager.SqlManagerClient + sourceJobRunId *string + syncConfigs []*tabledependency.RunConfig + destinationConnection *mgmtv1alpha1.Connection + sourceConnectionType bb_internal.ConnectionType +} + +func NewNeosyncConnectionDataSyncBuilder( + connectiondataclient mgmtv1alpha1connect.ConnectionDataServiceClient, + sqlmanagerclient sqlmanager.SqlManagerClient, + sourceJobRunId *string, + syncConfigs []*tabledependency.RunConfig, + destinationConnection *mgmtv1alpha1.Connection, + sourceConnectionType bb_internal.ConnectionType, +) bb_internal.BenthosBuilder { + return &neosyncConnectionDataBuilder{ + connectiondataclient: connectiondataclient, + sqlmanagerclient: sqlmanagerclient, + sourceJobRunId: sourceJobRunId, + syncConfigs: syncConfigs, + destinationConnection: destinationConnection, + sourceConnectionType: sourceConnectionType, + } +} + +func (b *neosyncConnectionDataBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + sourceConnection := params.SourceConnection + job := params.Job + configs := []*bb_internal.BenthosSourceConfig{} + + for _, config := range b.syncConfigs { + schema, table := sqlmanager_shared.SplitTableKey(config.Table()) + + bc := &neosync_benthos.BenthosConfig{ + StreamConfig: neosync_benthos.StreamConfig{ + Logger: &neosync_benthos.LoggerConfig{ + Level: "ERROR", + AddTimestamp: true, + }, + Input: &neosync_benthos.InputConfig{ + Inputs: neosync_benthos.Inputs{ + NeosyncConnectionData: &neosync_benthos.NeosyncConnectionData{ + ConnectionId: sourceConnection.GetId(), + ConnectionType: string(b.sourceConnectionType), + JobId: &job.Id, + JobRunId: b.sourceJobRunId, + Schema: schema, + Table: table, + }, + }, + }, + Pipeline: &neosync_benthos.PipelineConfig{}, + Output: &neosync_benthos.OutputConfig{ + Outputs: neosync_benthos.Outputs{ + Broker: &neosync_benthos.OutputBrokerConfig{ + Pattern: "fan_out", + Outputs: []neosync_benthos.Outputs{}, + }, + }, + }, + }, + } + configs = append(configs, &bb_internal.BenthosSourceConfig{ + Name: fmt.Sprintf("%s.%s", config.Table(), config.RunType()), + Config: bc, + DependsOn: config.DependsOn(), + RunType: config.RunType(), + + BenthosDsns: []*bb_shared.BenthosDsn{{ConnectionId: sourceConnection.Id, EnvVarKey: "SOURCE_CONNECTION_DSN"}}, + + TableSchema: schema, + TableName: table, + Columns: config.InsertColumns(), + PrimaryKeys: config.PrimaryKeys(), + }) + } + + return configs, nil +} + +func (b *neosyncConnectionDataBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + return nil, errors.ErrUnsupported +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/processors.go b/internal/benthos/benthos-builder/builders/processors.go similarity index 92% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/processors.go rename to internal/benthos/benthos-builder/builders/processors.go index f1561c612b..99cce08925 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/processors.go +++ b/internal/benthos/benthos-builder/builders/processors.go @@ -1,4 +1,4 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "context" @@ -15,6 +15,7 @@ import ( "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/nucleuscloud/neosync/worker/pkg/benthos/transformers" transformer_utils "github.com/nucleuscloud/neosync/worker/pkg/benthos/transformers/utils" @@ -22,11 +23,58 @@ import ( "google.golang.org/protobuf/encoding/protojson" ) +func buildProcessorConfigsByRunType( + ctx context.Context, + transformerclient mgmtv1alpha1connect.TransformersServiceClient, + config *tabledependency.RunConfig, + columnForeignKeysMap map[string][]*bb_internal.ReferenceKey, + transformedFktoPkMap map[string][]*bb_internal.ReferenceKey, + jobId, runId string, + redisConfig *shared.RedisConfig, + mappings []*mgmtv1alpha1.JobMapping, + columnInfoMap map[string]*sqlmanager_shared.ColumnInfo, + jobSourceOptions *mgmtv1alpha1.JobSourceOptions, + mappedKeys []string, +) ([]*neosync_benthos.ProcessorConfig, error) { + if config.RunType() == tabledependency.RunTypeUpdate { + // sql update processor configs + processorConfigs, err := buildSqlUpdateProcessorConfigs(config, redisConfig, jobId, runId, transformedFktoPkMap) + if err != nil { + return nil, err + } + return processorConfigs, nil + } else { + // sql insert processor configs + fkSourceCols := []string{} + for col := range columnForeignKeysMap { + fkSourceCols = append(fkSourceCols, col) + } + processorConfigs, err := buildProcessorConfigs( + ctx, + transformerclient, + mappings, + columnInfoMap, + transformedFktoPkMap, + fkSourceCols, + jobId, + runId, + redisConfig, + config, + jobSourceOptions, + mappedKeys, + ) + if err != nil { + return nil, err + } + return processorConfigs, nil + } +} + func buildSqlUpdateProcessorConfigs( config *tabledependency.RunConfig, redisConfig *shared.RedisConfig, jobId, runId string, - transformedFktoPkMap map[string][]*referenceKey, + transformedFktoPkMap map[string][]*bb_internal.ReferenceKey, ) ([]*neosync_benthos.ProcessorConfig, error) { processorConfigs := []*neosync_benthos.ProcessorConfig{} for fkCol, pks := range transformedFktoPkMap { @@ -76,7 +124,7 @@ func buildProcessorConfigs( transformerclient mgmtv1alpha1connect.TransformersServiceClient, cols []*mgmtv1alpha1.JobMapping, tableColumnInfo map[string]*sqlmanager_shared.ColumnInfo, - transformedFktoPkMap map[string][]*referenceKey, + transformedFktoPkMap map[string][]*bb_internal.ReferenceKey, fkSourceCols []string, jobId, runId string, redisConfig *shared.RedisConfig, @@ -279,7 +327,7 @@ func generateSha1Hash(input string) string { func buildBranchCacheConfigs( cols []*mgmtv1alpha1.JobMapping, - transformedFktoPkMap map[string][]*referenceKey, + transformedFktoPkMap map[string][]*bb_internal.ReferenceKey, jobId, runId string, redisConfig *shared.RedisConfig, ) ([]*neosync_benthos.BranchConfig, error) { @@ -709,3 +757,25 @@ func computeMutationFunction(col *mgmtv1alpha1.JobMapping, colInfo *sqlmanager_s return "", fmt.Errorf("unsupported transformer: %T", cfg) } } + +func shouldProcessColumn(t *mgmtv1alpha1.JobMappingTransformer) bool { + switch t.GetConfig().GetConfig().(type) { + case *mgmtv1alpha1.TransformerConfig_PassthroughConfig, + nil: + return false + default: + return true + } +} + +func shouldProcessStrict(t *mgmtv1alpha1.JobMappingTransformer) bool { + switch t.GetConfig().GetConfig().(type) { + case *mgmtv1alpha1.TransformerConfig_PassthroughConfig, + *mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig, + *mgmtv1alpha1.TransformerConfig_Nullconfig, + nil: + return false + default: + return true + } +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/processors_test.go b/internal/benthos/benthos-builder/builders/processors_test.go similarity index 80% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/processors_test.go rename to internal/benthos/benthos-builder/builders/processors_test.go index d52b1ccd78..9a4fa0f4cd 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/processors_test.go +++ b/internal/benthos/benthos-builder/builders/processors_test.go @@ -1,4 +1,4 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "context" @@ -10,6 +10,7 @@ import ( "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" "github.com/stretchr/testify/require" ) @@ -65,7 +66,7 @@ func Test_buildProcessorConfigsJavascript(t *testing.T) { Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}, }}, map[string]*sqlmanager_shared.ColumnInfo{}, - map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, + map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, tabledependency.NewRunConfig("", tabledependency.RunTypeInsert, nil, nil, nil, []string{"address"}, nil, false), nil, []string{}, @@ -124,7 +125,7 @@ func Test_buildProcessorConfigsGenerateJavascript(t *testing.T) { Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}, }}, map[string]*sqlmanager_shared.ColumnInfo{}, - map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, + map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, tabledependency.NewRunConfig("", tabledependency.RunTypeInsert, nil, nil, nil, []string{"test"}, nil, false), nil, []string{}, @@ -192,7 +193,7 @@ func Test_buildProcessorConfigsJavascriptMultiple(t *testing.T) { []*mgmtv1alpha1.JobMapping{ {Schema: "public", Table: "users", Column: nameCol, Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}}, {Schema: "public", Table: "users", Column: ageCol, Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT2.Config}}}, - map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, + map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, tabledependency.NewRunConfig("", tabledependency.RunTypeInsert, nil, nil, nil, []string{nameCol, ageCol}, nil, false), nil, []string{}, @@ -261,7 +262,7 @@ func Test_buildProcessorConfigsTransformAndGenerateJavascript(t *testing.T) { []*mgmtv1alpha1.JobMapping{ {Schema: "public", Table: "users", Column: nameCol, Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}}, {Schema: "public", Table: "users", Column: col2, Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT2.Config}}}, - map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, + map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, tabledependency.NewRunConfig("", tabledependency.RunTypeInsert, nil, nil, nil, []string{nameCol, col2}, nil, false), nil, []string{}, @@ -321,7 +322,7 @@ func Test_buildProcessorConfigsJavascript_DeepKeys(t *testing.T) { Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}, }}, map[string]*sqlmanager_shared.ColumnInfo{}, - map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, + map[string][]*bb_internal.ReferenceKey{}, []string{}, mockJobId, mockRunId, nil, tabledependency.NewRunConfig("", tabledependency.RunTypeInsert, nil, nil, nil, []string{"foo.bar.baz"}, nil, false), nil, []string{}, @@ -357,3 +358,65 @@ const neosync = { require.NotNil(t, outputMap) require.Equal(t, "hello world", outputMap["foo.bar.baz"]) } + +func Test_shouldProcessColumn(t *testing.T) { + t.Run("no - passthrough", func(t *testing.T) { + actual := shouldProcessColumn(&mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, + }, + }) + require.False(t, actual) + }) + t.Run("no - nil", func(t *testing.T) { + actual := shouldProcessColumn(nil) + require.False(t, actual) + }) + t.Run("yes", func(t *testing.T) { + actual := shouldProcessColumn(&mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{}, + }, + }) + require.True(t, actual) + }) +} + +func Test_shouldProcessStrict(t *testing.T) { + t.Run("no - passthrough", func(t *testing.T) { + actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, + }, + }) + require.False(t, actual) + }) + t.Run("no - default", func(t *testing.T) { + actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{}, + }, + }) + require.False(t, actual) + }) + t.Run("no - null", func(t *testing.T) { + actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{}, + }, + }) + require.False(t, actual) + }) + t.Run("no - nil", func(t *testing.T) { + actual := shouldProcessStrict(nil) + require.False(t, actual) + }) + t.Run("yes", func(t *testing.T) { + actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ + Config: &mgmtv1alpha1.TransformerConfig{ + Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{}, + }, + }) + require.True(t, actual) + }) +} diff --git a/internal/benthos/benthos-builder/builders/sql-util.go b/internal/benthos/benthos-builder/builders/sql-util.go new file mode 100644 index 0000000000..cb71faf318 --- /dev/null +++ b/internal/benthos/benthos-builder/builders/sql-util.go @@ -0,0 +1,577 @@ +package benthosbuilder_builders + +import ( + "fmt" + "log/slog" + "slices" + "strings" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +) + +const ( + jobmappingSubsetErrMsg = "job mappings are not equal to or a subset of the database schema found in the source connection" + haltOnSchemaAdditionErrMsg = "job mappings does not contain a column mapping for all " + + "columns found in the source connection for the selected schemas and tables" +) + +type sqlJobSourceOpts struct { + HaltOnNewColumnAddition bool + SubsetByForeignKeyConstraints bool + SchemaOpt []*schemaOptions +} +type schemaOptions struct { + Schema string + Tables []*tableOptions +} +type tableOptions struct { + Table string + WhereClause *string +} + +type sqlSourceTableOptions struct { + WhereClause *string +} + +type tableMapping struct { + Schema string + Table string + Mappings []*mgmtv1alpha1.JobMapping +} + +func areMappingsSubsetOfSchemas( + groupedSchemas map[string]map[string]*sqlmanager_shared.ColumnInfo, + mappings []*mgmtv1alpha1.JobMapping, +) bool { + tableColMappings := getUniqueColMappingsMap(mappings) + + for key := range groupedSchemas { + // For this method, we only care about the schemas+tables that we currently have mappings for + if _, ok := tableColMappings[key]; !ok { + delete(groupedSchemas, key) + } + } + + if len(tableColMappings) != len(groupedSchemas) { + return false + } + + // tests to make sure that every column in the col mappings is present in the db schema + for table, cols := range tableColMappings { + schemaCols, ok := groupedSchemas[table] + if !ok { + return false + } + // job mappings has more columns than the schema + if len(cols) > len(schemaCols) { + return false + } + for col := range cols { + if _, ok := schemaCols[col]; !ok { + return false + } + } + } + return true +} + +func getUniqueColMappingsMap( + mappings []*mgmtv1alpha1.JobMapping, +) map[string]map[string]struct{} { + tableColMappings := map[string]map[string]struct{}{} + for _, mapping := range mappings { + key := neosync_benthos.BuildBenthosTable(mapping.Schema, mapping.Table) + if _, ok := tableColMappings[key]; ok { + tableColMappings[key][mapping.Column] = struct{}{} + } else { + tableColMappings[key] = map[string]struct{}{ + mapping.Column: {}, + } + } + } + return tableColMappings +} + +func shouldHaltOnSchemaAddition( + groupedSchemas map[string]map[string]*sqlmanager_shared.ColumnInfo, + mappings []*mgmtv1alpha1.JobMapping, +) bool { + tableColMappings := getUniqueColMappingsMap(mappings) + + if len(tableColMappings) != len(groupedSchemas) { + return true + } + + for table, cols := range groupedSchemas { + mappingCols, ok := tableColMappings[table] + if !ok { + return true + } + if len(cols) > len(mappingCols) { + return true + } + for col := range cols { + if _, ok := mappingCols[col]; !ok { + return true + } + } + } + return false +} + +func getMapValuesCount[K comparable, V any](m map[K][]V) int { + count := 0 + for _, v := range m { + count += len(v) + } + return count +} + +func buildPlainInsertArgs(cols []string) string { + if len(cols) == 0 { + return "" + } + pieces := make([]string, len(cols)) + for idx := range cols { + pieces[idx] = fmt.Sprintf("this.%q", cols[idx]) + } + return fmt.Sprintf("root = [%s]", strings.Join(pieces, ", ")) +} + +func buildPlainColumns(mappings []*mgmtv1alpha1.JobMapping) []string { + columns := make([]string, len(mappings)) + for idx := range mappings { + columns[idx] = mappings[idx].Column + } + return columns +} + +func buildTableSubsetMap(tableOpts map[string]*sqlSourceTableOptions, tableMap map[string]*tableMapping) map[string]string { + tableSubsetMap := map[string]string{} + for table, opts := range tableOpts { + if _, ok := tableMap[table]; !ok { + continue + } + if opts != nil && opts.WhereClause != nil && *opts.WhereClause != "" { + tableSubsetMap[table] = *opts.WhereClause + } + } + return tableSubsetMap +} + +func groupSqlJobSourceOptionsByTable( + sqlSourceOpts *sqlJobSourceOpts, +) map[string]*sqlSourceTableOptions { + groupedMappings := map[string]*sqlSourceTableOptions{} + for _, schemaOpt := range sqlSourceOpts.SchemaOpt { + for tidx := range schemaOpt.Tables { + tableOpt := schemaOpt.Tables[tidx] + key := neosync_benthos.BuildBenthosTable(schemaOpt.Schema, tableOpt.Table) + groupedMappings[key] = &sqlSourceTableOptions{ + WhereClause: tableOpt.WhereClause, + } + } + } + return groupedMappings +} + +func mergeVirtualForeignKeys( + dbForeignKeys map[string][]*sqlmanager_shared.ForeignConstraint, + virtualForeignKeys []*mgmtv1alpha1.VirtualForeignConstraint, + colInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo, +) (map[string][]*sqlmanager_shared.ForeignConstraint, error) { + fks := map[string][]*sqlmanager_shared.ForeignConstraint{} + + for table, fk := range dbForeignKeys { + fks[table] = fk + } + + for _, fk := range virtualForeignKeys { + tn := sqlmanager_shared.BuildTable(fk.Schema, fk.Table) + fkTable := sqlmanager_shared.BuildTable(fk.GetForeignKey().Schema, fk.GetForeignKey().Table) + notNullable := []bool{} + for _, c := range fk.GetColumns() { + colMap, ok := colInfoMap[tn] + if !ok { + return nil, fmt.Errorf("virtual foreign key source table not found: %s", tn) + } + colInfo, ok := colMap[c] + if !ok { + return nil, fmt.Errorf("virtual foreign key source column not found: %s.%s", tn, c) + } + notNullable = append(notNullable, !colInfo.IsNullable) + } + fks[tn] = append(fks[tn], &sqlmanager_shared.ForeignConstraint{ + Columns: fk.GetColumns(), + NotNullable: notNullable, + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: fkTable, + Columns: fk.GetForeignKey().GetColumns(), + }, + }) + } + + return fks, nil +} + +func groupMappingsByTable( + mappings []*mgmtv1alpha1.JobMapping, +) []*tableMapping { + groupedMappings := map[string][]*mgmtv1alpha1.JobMapping{} + + for _, mapping := range mappings { + key := neosync_benthos.BuildBenthosTable(mapping.Schema, mapping.Table) + groupedMappings[key] = append(groupedMappings[key], mapping) + } + + output := make([]*tableMapping, 0, len(groupedMappings)) + for key, mappings := range groupedMappings { + schema, table := sqlmanager_shared.SplitTableKey(key) + output = append(output, &tableMapping{ + Schema: schema, + Table: table, + Mappings: mappings, + }) + } + return output +} + +func getTableMappingsMap(groupedMappings []*tableMapping) map[string]*tableMapping { + groupedTableMapping := map[string]*tableMapping{} + for _, tm := range groupedMappings { + groupedTableMapping[neosync_benthos.BuildBenthosTable(tm.Schema, tm.Table)] = tm + } + return groupedTableMapping +} + +func getColumnTransformerMap(tableMappingMap map[string]*tableMapping) map[string]map[string]*mgmtv1alpha1.JobMappingTransformer { + colTransformerMap := map[string]map[string]*mgmtv1alpha1.JobMappingTransformer{} // schema.table -> column -> transformer + for table, mapping := range tableMappingMap { + colTransformerMap[table] = map[string]*mgmtv1alpha1.JobMappingTransformer{} + for _, m := range mapping.Mappings { + colTransformerMap[table][m.Column] = m.Transformer + } + } + return colTransformerMap +} + +func getTableColMapFromMappings(mappings []*tableMapping) map[string][]string { + tableColMap := map[string][]string{} + for _, m := range mappings { + cols := []string{} + for _, c := range m.Mappings { + cols = append(cols, c.Column) + } + tn := sqlmanager_shared.BuildTable(m.Schema, m.Table) + tableColMap[tn] = cols + } + return tableColMap +} + +func filterForeignKeysMap( + colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, + foreignKeysMap map[string][]*sqlmanager_shared.ForeignConstraint, +) map[string][]*sqlmanager_shared.ForeignConstraint { + newFkMap := make(map[string][]*sqlmanager_shared.ForeignConstraint) + + for table, fks := range foreignKeysMap { + cols, ok := colTransformerMap[table] + if !ok { + continue + } + for _, fk := range fks { + newFk := &sqlmanager_shared.ForeignConstraint{ + ForeignKey: &sqlmanager_shared.ForeignKey{ + Table: fk.ForeignKey.Table, + }, + } + for i, c := range fk.Columns { + t, ok := cols[c] + if !fk.NotNullable[i] && (!ok || isNullJobMappingTransformer(t)) { + continue + } + + newFk.Columns = append(newFk.Columns, c) + newFk.NotNullable = append(newFk.NotNullable, fk.NotNullable[i]) + newFk.ForeignKey.Columns = append(newFk.ForeignKey.Columns, fk.ForeignKey.Columns[i]) + } + + if len(newFk.Columns) > 0 { + newFkMap[table] = append(newFkMap[table], newFk) + } + } + } + return newFkMap +} + +func isNullJobMappingTransformer(t *mgmtv1alpha1.JobMappingTransformer) bool { + switch t.GetConfig().GetConfig().(type) { + case *mgmtv1alpha1.TransformerConfig_Nullconfig: + return true + default: + return false + } +} + +func isDefaultJobMappingTransformer(t *mgmtv1alpha1.JobMappingTransformer) bool { + switch t.GetConfig().GetConfig().(type) { + case *mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig: + return true + default: + return false + } +} + +// map of table primary key cols to foreign key cols +func getPrimaryKeyDependencyMap(tableDependencies map[string][]*sqlmanager_shared.ForeignConstraint) map[string]map[string][]*bb_internal.ReferenceKey { + tc := map[string]map[string][]*bb_internal.ReferenceKey{} // schema.table -> column -> ForeignKey + for table, constraints := range tableDependencies { + for _, c := range constraints { + _, ok := tc[c.ForeignKey.Table] + if !ok { + tc[c.ForeignKey.Table] = map[string][]*bb_internal.ReferenceKey{} + } + for idx, col := range c.ForeignKey.Columns { + tc[c.ForeignKey.Table][col] = append(tc[c.ForeignKey.Table][col], &bb_internal.ReferenceKey{ + Table: table, + Column: c.Columns[idx], + }) + } + } + } + return tc +} + +func findTopForeignKeySource(tableName, col string, tableDependencies map[string][]*sqlmanager_shared.ForeignConstraint) *bb_internal.ReferenceKey { + // Add the foreign key dependencies of the current table + if foreignKeys, ok := tableDependencies[tableName]; ok { + for _, fk := range foreignKeys { + for idx, c := range fk.Columns { + if c == col { + // Recursively add dependent tables and their foreign keys + return findTopForeignKeySource(fk.ForeignKey.Table, fk.ForeignKey.Columns[idx], tableDependencies) + } + } + } + } + return &bb_internal.ReferenceKey{ + Table: tableName, + Column: col, + } +} + +// builds schema.table -> FK column -> PK schema table column +// find top level primary key column if foreign keys are nested +func buildForeignKeySourceMap(tableDeps map[string][]*sqlmanager_shared.ForeignConstraint) map[string]map[string]*bb_internal.ReferenceKey { + outputMap := map[string]map[string]*bb_internal.ReferenceKey{} + for tableName, constraints := range tableDeps { + if _, ok := outputMap[tableName]; !ok { + outputMap[tableName] = map[string]*bb_internal.ReferenceKey{} + } + for _, con := range constraints { + for _, col := range con.Columns { + fk := findTopForeignKeySource(tableName, col, tableDeps) + outputMap[tableName][col] = fk + } + } + } + return outputMap +} + +func getTransformedFksMap( + tabledependencies map[string][]*sqlmanager_shared.ForeignConstraint, + colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, +) map[string]map[string][]*bb_internal.ReferenceKey { + foreignKeyToSourceMap := buildForeignKeySourceMap(tabledependencies) + // filter this list by table constraints that has transformer + transformedForeignKeyToSourceMap := map[string]map[string][]*bb_internal.ReferenceKey{} // schema.table -> column -> foreignKey + for table, constraints := range foreignKeyToSourceMap { + _, ok := transformedForeignKeyToSourceMap[table] + if !ok { + transformedForeignKeyToSourceMap[table] = map[string][]*bb_internal.ReferenceKey{} + } + for col, tc := range constraints { + // only add constraint if foreign key has transformer + transformer, transformerOk := colTransformerMap[tc.Table][tc.Column] + if transformerOk && shouldProcessStrict(transformer) { + transformedForeignKeyToSourceMap[table][col] = append(transformedForeignKeyToSourceMap[table][col], tc) + } + } + } + return transformedForeignKeyToSourceMap +} + +func getColumnDefaultProperties( + slogger *slog.Logger, + driver string, + cols []string, + colInfo map[string]*sqlmanager_shared.ColumnInfo, + colTransformers map[string]*mgmtv1alpha1.JobMappingTransformer, +) (map[string]*neosync_benthos.ColumnDefaultProperties, error) { + colDefaults := map[string]*neosync_benthos.ColumnDefaultProperties{} + for _, cName := range cols { + info, ok := colInfo[cName] + if !ok { + return nil, fmt.Errorf("column default type missing. column: %s", cName) + } + needsOverride, needsReset, err := sqlmanager.GetColumnOverrideAndResetProperties(driver, info) + if err != nil { + slogger.Error("unable to determine SQL column default flags", "error", err, "column", cName) + return nil, err + } + + jmTransformer, ok := colTransformers[cName] + if !ok { + return nil, fmt.Errorf("transformer missing for column: %s", cName) + } + var hasDefaultTransformer bool + if jmTransformer != nil && isDefaultJobMappingTransformer(jmTransformer) { + hasDefaultTransformer = true + } + if !needsReset && !needsOverride && !hasDefaultTransformer { + continue + } + colDefaults[cName] = &neosync_benthos.ColumnDefaultProperties{ + NeedsReset: needsReset, + NeedsOverride: needsOverride, + HasDefaultTransformer: hasDefaultTransformer, + } + } + return colDefaults, nil +} + +func buildRedisDependsOnMap(transformedForeignKeyToSourceMap map[string][]*bb_internal.ReferenceKey, runconfig *tabledependency.RunConfig) map[string][]string { + redisDependsOnMap := map[string][]string{} + for col, fks := range transformedForeignKeyToSourceMap { + if !slices.Contains(runconfig.InsertColumns(), col) { + continue + } + for _, fk := range fks { + if _, exists := redisDependsOnMap[fk.Table]; !exists { + redisDependsOnMap[fk.Table] = []string{} + } + redisDependsOnMap[fk.Table] = append(redisDependsOnMap[fk.Table], fk.Column) + } + } + if runconfig.RunType() == tabledependency.RunTypeUpdate && len(redisDependsOnMap) != 0 { + redisDependsOnMap[runconfig.Table()] = runconfig.PrimaryKeys() + } + return redisDependsOnMap +} + +func getSqlJobSourceOpts( + source *mgmtv1alpha1.JobSource, +) (*sqlJobSourceOpts, error) { + switch jobSourceConfig := source.GetOptions().GetConfig().(type) { + case *mgmtv1alpha1.JobSourceOptions_Postgres: + if jobSourceConfig.Postgres == nil { + return nil, nil + } + schemaOpt := []*schemaOptions{} + for _, opt := range jobSourceConfig.Postgres.Schemas { + tableOpts := []*tableOptions{} + for _, t := range opt.GetTables() { + tableOpts = append(tableOpts, &tableOptions{ + Table: t.Table, + WhereClause: t.WhereClause, + }) + } + schemaOpt = append(schemaOpt, &schemaOptions{ + Schema: opt.GetSchema(), + Tables: tableOpts, + }) + } + return &sqlJobSourceOpts{ + HaltOnNewColumnAddition: jobSourceConfig.Postgres.HaltOnNewColumnAddition, + SubsetByForeignKeyConstraints: jobSourceConfig.Postgres.SubsetByForeignKeyConstraints, + SchemaOpt: schemaOpt, + }, nil + case *mgmtv1alpha1.JobSourceOptions_Mysql: + if jobSourceConfig.Mysql == nil { + return nil, nil + } + schemaOpt := []*schemaOptions{} + for _, opt := range jobSourceConfig.Mysql.Schemas { + tableOpts := []*tableOptions{} + for _, t := range opt.GetTables() { + tableOpts = append(tableOpts, &tableOptions{ + Table: t.Table, + WhereClause: t.WhereClause, + }) + } + schemaOpt = append(schemaOpt, &schemaOptions{ + Schema: opt.GetSchema(), + Tables: tableOpts, + }) + } + return &sqlJobSourceOpts{ + HaltOnNewColumnAddition: jobSourceConfig.Mysql.HaltOnNewColumnAddition, + SubsetByForeignKeyConstraints: jobSourceConfig.Mysql.SubsetByForeignKeyConstraints, + SchemaOpt: schemaOpt, + }, nil + case *mgmtv1alpha1.JobSourceOptions_Mssql: + if jobSourceConfig.Mssql == nil { + return nil, nil + } + schemaOpt := []*schemaOptions{} + for _, opt := range jobSourceConfig.Mssql.Schemas { + tableOpts := []*tableOptions{} + for _, t := range opt.GetTables() { + tableOpts = append(tableOpts, &tableOptions{ + Table: t.Table, + WhereClause: t.WhereClause, + }) + } + schemaOpt = append(schemaOpt, &schemaOptions{ + Schema: opt.GetSchema(), + Tables: tableOpts, + }) + } + return &sqlJobSourceOpts{ + HaltOnNewColumnAddition: jobSourceConfig.Mssql.HaltOnNewColumnAddition, + SubsetByForeignKeyConstraints: jobSourceConfig.Mssql.SubsetByForeignKeyConstraints, + SchemaOpt: schemaOpt, + }, nil + default: + return nil, fmt.Errorf("unsupported job source options type for sql job source: %T", jobSourceConfig) + } +} + +type destinationOptions struct { + OnConflictDoNothing bool + Truncate bool + TruncateCascade bool + SkipForeignKeyViolations bool +} + +func getDestinationOptions(destOpts *mgmtv1alpha1.JobDestinationOptions) *destinationOptions { + if destOpts.Config == nil { + return &destinationOptions{} + } + switch config := destOpts.Config.(type) { + case *mgmtv1alpha1.JobDestinationOptions_PostgresOptions: + return &destinationOptions{ + OnConflictDoNothing: config.PostgresOptions.GetOnConflict().GetDoNothing(), + Truncate: config.PostgresOptions.GetTruncateTable().GetTruncateBeforeInsert(), + TruncateCascade: config.PostgresOptions.GetTruncateTable().GetCascade(), + SkipForeignKeyViolations: config.PostgresOptions.GetSkipForeignKeyViolations(), + } + case *mgmtv1alpha1.JobDestinationOptions_MysqlOptions: + return &destinationOptions{ + OnConflictDoNothing: config.MysqlOptions.GetOnConflict().GetDoNothing(), + Truncate: config.MysqlOptions.GetTruncateTable().GetTruncateBeforeInsert(), + SkipForeignKeyViolations: config.MysqlOptions.GetSkipForeignKeyViolations(), + } + case *mgmtv1alpha1.JobDestinationOptions_MssqlOptions: + return &destinationOptions{ + SkipForeignKeyViolations: config.MssqlOptions.GetSkipForeignKeyViolations(), + } + default: + return &destinationOptions{} + } +} diff --git a/internal/benthos/benthos-builder/builders/sql.go b/internal/benthos/benthos-builder/builders/sql.go new file mode 100644 index 0000000000..35ea52672d --- /dev/null +++ b/internal/benthos/benthos-builder/builders/sql.go @@ -0,0 +1,471 @@ +package benthosbuilder_builders + +import ( + "context" + "errors" + "fmt" + "log/slog" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" + "github.com/nucleuscloud/neosync/backend/pkg/metrics" + "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" + sqlmanager_mssql "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/mssql" + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" + "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" +) + +type sqlSyncBuilder struct { + transformerclient mgmtv1alpha1connect.TransformersServiceClient + sqlmanagerclient sqlmanager.SqlManagerClient + redisConfig *shared.RedisConfig + driver string + selectQueryBuilder bb_shared.SelectQueryMapBuilder + + // reverse of table dependency + // map of foreign key to source table + column + primaryKeyToForeignKeysMap map[string]map[string][]*bb_internal.ReferenceKey // schema.table -> column -> ForeignKey + colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer // schema.table -> column -> transformer + sqlSourceSchemaColumnInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo // schema.table -> column -> column info struct + sqlDestinationSchemaColumnInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo // schema.table -> column -> column info struct +} + +func NewSqlSyncBuilder( + transformerclient mgmtv1alpha1connect.TransformersServiceClient, + sqlmanagerclient sqlmanager.SqlManagerClient, + redisConfig *shared.RedisConfig, + databaseDriver string, + selectQueryBuilder bb_shared.SelectQueryMapBuilder, +) bb_internal.BenthosBuilder { + return &sqlSyncBuilder{ + transformerclient: transformerclient, + sqlmanagerclient: sqlmanagerclient, + redisConfig: redisConfig, + driver: databaseDriver, + selectQueryBuilder: selectQueryBuilder, + } +} + +func (b *sqlSyncBuilder) BuildSourceConfigs(ctx context.Context, params *bb_internal.SourceParams) ([]*bb_internal.BenthosSourceConfig, error) { + sourceConnection := params.SourceConnection + job := params.Job + logger := params.Logger + + sqlSourceOpts, err := getSqlJobSourceOpts(job.Source) + if err != nil { + return nil, err + } + var sourceTableOpts map[string]*sqlSourceTableOptions + if sqlSourceOpts != nil { + sourceTableOpts = groupSqlJobSourceOptionsByTable(sqlSourceOpts) + } + + db, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, logger, sourceConnection) + if err != nil { + return nil, fmt.Errorf("unable to create new sql db: %w", err) + } + defer db.Db.Close() + + groupedColumnInfo, err := db.Db.GetSchemaColumnMap(ctx) + if err != nil { + return nil, fmt.Errorf("unable to get database schema for connection: %w", err) + } + b.sqlSourceSchemaColumnInfoMap = groupedColumnInfo + if !areMappingsSubsetOfSchemas(groupedColumnInfo, job.Mappings) { + return nil, errors.New(jobmappingSubsetErrMsg) + } + if sqlSourceOpts != nil && sqlSourceOpts.HaltOnNewColumnAddition && + shouldHaltOnSchemaAddition(groupedColumnInfo, job.Mappings) { + return nil, errors.New(haltOnSchemaAdditionErrMsg) + } + uniqueSchemas := shared.GetUniqueSchemasFromMappings(job.Mappings) + + tableConstraints, err := db.Db.GetTableConstraintsBySchema(ctx, uniqueSchemas) + if err != nil { + return nil, fmt.Errorf("unable to retrieve database table constraints: %w", err) + } + + foreignKeysMap, err := mergeVirtualForeignKeys(tableConstraints.ForeignKeyConstraints, job.GetVirtualForeignKeys(), groupedColumnInfo) + if err != nil { + return nil, err + } + + logger.Info(fmt.Sprintf("found %d foreign key constraints for database", getMapValuesCount(tableConstraints.ForeignKeyConstraints))) + logger.Info(fmt.Sprintf("found %d primary key constraints for database", getMapValuesCount(tableConstraints.PrimaryKeyConstraints))) + + groupedMappings := groupMappingsByTable(job.Mappings) + groupedTableMapping := getTableMappingsMap(groupedMappings) + colTransformerMap := getColumnTransformerMap(groupedTableMapping) // schema.table -> column -> transformer + b.colTransformerMap = colTransformerMap + filteredForeignKeysMap := filterForeignKeysMap(colTransformerMap, foreignKeysMap) + + tableSubsetMap := buildTableSubsetMap(sourceTableOpts, groupedTableMapping) + tableColMap := getTableColMapFromMappings(groupedMappings) + runConfigs, err := tabledependency.GetRunConfigs(filteredForeignKeysMap, tableSubsetMap, tableConstraints.PrimaryKeyConstraints, tableColMap) + if err != nil { + return nil, err + } + primaryKeyToForeignKeysMap := getPrimaryKeyDependencyMap(filteredForeignKeysMap) + b.primaryKeyToForeignKeysMap = primaryKeyToForeignKeysMap + + tableRunTypeQueryMap, err := b.selectQueryBuilder.BuildSelectQueryMap(db.Driver, filteredForeignKeysMap, runConfigs, sqlSourceOpts.SubsetByForeignKeyConstraints, groupedColumnInfo) + if err != nil { + return nil, fmt.Errorf("unable to build select queries: %w", err) + } + + configs, err := buildBenthosSqlSourceConfigResponses(logger, ctx, b.transformerclient, groupedTableMapping, runConfigs, sourceConnection.Id, db.Driver, tableRunTypeQueryMap, groupedColumnInfo, filteredForeignKeysMap, colTransformerMap, job.Id, params.RunId, b.redisConfig, primaryKeyToForeignKeysMap) + if err != nil { + return nil, fmt.Errorf("unable to build benthos sql source config responses: %w", err) + } + + return configs, nil +} + +func buildBenthosSqlSourceConfigResponses( + slogger *slog.Logger, + ctx context.Context, + transformerclient mgmtv1alpha1connect.TransformersServiceClient, + groupedTableMapping map[string]*tableMapping, + runconfigs []*tabledependency.RunConfig, + dsnConnectionId string, + driver string, + tableRunTypeQueryMap map[string]map[tabledependency.RunType]string, + groupedColumnInfo map[string]map[string]*sqlmanager_shared.ColumnInfo, + tableDependencies map[string][]*sqlmanager_shared.ForeignConstraint, + colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, + jobId, runId string, + redisConfig *shared.RedisConfig, + primaryKeyToForeignKeysMap map[string]map[string][]*bb_internal.ReferenceKey, +) ([]*bb_internal.BenthosSourceConfig, error) { + configs := []*bb_internal.BenthosSourceConfig{} + + // map of table constraints that have transformers + transformedForeignKeyToSourceMap := getTransformedFksMap(tableDependencies, colTransformerMap) + + for _, config := range runconfigs { + mappings, ok := groupedTableMapping[config.Table()] + if !ok { + return nil, fmt.Errorf("missing column mappings for table: %s", config.Table()) + } + query, ok := tableRunTypeQueryMap[config.Table()][config.RunType()] + if !ok { + return nil, fmt.Errorf("select query not found for table: %s runType: %s", config.Table(), config.RunType()) + } + bc := &neosync_benthos.BenthosConfig{ + StreamConfig: neosync_benthos.StreamConfig{ + Input: &neosync_benthos.InputConfig{ + Inputs: neosync_benthos.Inputs{ + PooledSqlRaw: &neosync_benthos.InputPooledSqlRaw{ + Driver: driver, + Dsn: "${SOURCE_CONNECTION_DSN}", + + Query: query, + }, + }, + }, + Pipeline: &neosync_benthos.PipelineConfig{ + Threads: -1, + Processors: []neosync_benthos.ProcessorConfig{}, + }, + Output: &neosync_benthos.OutputConfig{ + Outputs: neosync_benthos.Outputs{ + Broker: &neosync_benthos.OutputBrokerConfig{ + Pattern: "fan_out", + Outputs: []neosync_benthos.Outputs{}, + }, + }, + }, + }, + } + + columnForeignKeysMap := primaryKeyToForeignKeysMap[config.Table()] + transformedFktoPkMap := transformedForeignKeyToSourceMap[config.Table()] + colInfoMap := groupedColumnInfo[config.Table()] + + slogger.Debug("building processors") + processorConfigs, err := buildProcessorConfigsByRunType( + ctx, + transformerclient, + config, + columnForeignKeysMap, + transformedFktoPkMap, + jobId, + runId, + redisConfig, + mappings.Mappings, + colInfoMap, + nil, + []string{}, + ) + if err != nil { + return nil, err + } + for _, pc := range processorConfigs { + bc.StreamConfig.Pipeline.Processors = append(bc.StreamConfig.Pipeline.Processors, *pc) + } + + configs = append(configs, &bb_internal.BenthosSourceConfig{ + Name: fmt.Sprintf("%s.%s", config.Table(), config.RunType()), + Config: bc, + DependsOn: config.DependsOn(), + RedisDependsOn: buildRedisDependsOnMap(transformedFktoPkMap, config), + RunType: config.RunType(), + + BenthosDsns: []*bb_shared.BenthosDsn{{ConnectionId: dsnConnectionId, EnvVarKey: "SOURCE_CONNECTION_DSN"}}, + + TableSchema: mappings.Schema, + TableName: mappings.Table, + Columns: config.InsertColumns(), + PrimaryKeys: config.PrimaryKeys(), + + Metriclabels: metrics.MetricLabels{ + metrics.NewEqLabel(metrics.TableSchemaLabel, mappings.Schema), + metrics.NewEqLabel(metrics.TableNameLabel, mappings.Table), + metrics.NewEqLabel(metrics.JobTypeLabel, "sync"), + }, + }) + } + return configs, nil +} + +func (b *sqlSyncBuilder) BuildDestinationConfig(ctx context.Context, params *bb_internal.DestinationParams) (*bb_internal.BenthosDestinationConfig, error) { + logger := params.Logger + benthosConfig := params.SourceConfig + tableKey := neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName) + + config := &bb_internal.BenthosDestinationConfig{} + + // lazy load + if len(b.sqlDestinationSchemaColumnInfoMap) == 0 { + sqlSchemaColMap := getSqlSchemaColumnMap(ctx, params.DestConnection, b.sqlSourceSchemaColumnInfoMap, b.sqlmanagerclient, params.Logger) + b.sqlDestinationSchemaColumnInfoMap = sqlSchemaColMap + } + + var colInfoMap map[string]*sqlmanager_shared.ColumnInfo + colMap, ok := b.sqlDestinationSchemaColumnInfoMap[tableKey] + if ok { + colInfoMap = colMap + } + + colTransformerMap := b.colTransformerMap + // lazy load + if len(colTransformerMap) == 0 { + groupedMappings := groupMappingsByTable(params.Job.Mappings) + groupedTableMapping := getTableMappingsMap(groupedMappings) + colTMap := getColumnTransformerMap(groupedTableMapping) // schema.table -> column -> transformer + b.colTransformerMap = colTMap + colTransformerMap = colTMap + } + + tableColTransformers := colTransformerMap[tableKey] + columnDefaultProperties, err := getColumnDefaultProperties(logger, b.driver, benthosConfig.Columns, colInfoMap, tableColTransformers) + if err != nil { + return nil, err + } + params.SourceConfig.ColumnDefaultProperties = columnDefaultProperties + + destOpts := params.DestinationOpts + config.BenthosDsns = append(config.BenthosDsns, &bb_shared.BenthosDsn{EnvVarKey: params.DestEnvVarKey, ConnectionId: params.DestConnection.Id}) + if benthosConfig.RunType == tabledependency.RunTypeUpdate { + args := benthosConfig.Columns + args = append(args, benthosConfig.PrimaryKeys...) + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + Fallback: []neosync_benthos.Outputs{ + { + PooledSqlUpdate: &neosync_benthos.PooledSqlUpdate{ + Driver: b.driver, + Dsn: params.DSN, + + Schema: benthosConfig.TableSchema, + Table: benthosConfig.TableName, + Columns: benthosConfig.Columns, + SkipForeignKeyViolations: destOpts.GetPostgresOptions().GetSkipForeignKeyViolations(), + WhereColumns: benthosConfig.PrimaryKeys, + ArgsMapping: buildPlainInsertArgs(args), + + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }, + }, + // kills activity depending on error + {Error: &neosync_benthos.ErrorOutputConfig{ + ErrorMsg: `${! meta("fallback_error")}`, + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }}, + }, + }) + } else { + // adds redis hash output for transformed primary keys + constraints := b.primaryKeyToForeignKeysMap[tableKey] + for col := range constraints { + transformer := b.colTransformerMap[tableKey][col] + if shouldProcessStrict(transformer) { + if b.redisConfig == nil { + return nil, fmt.Errorf("missing redis config. this operation requires redis") + } + hashedKey := neosync_benthos.HashBenthosCacheKey(params.Job.GetId(), params.RunId, tableKey, col) + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + RedisHashOutput: &neosync_benthos.RedisHashOutputConfig{ + Url: b.redisConfig.Url, + Key: hashedKey, + FieldsMapping: fmt.Sprintf(`root = {meta(%q): json(%q)}`, hashPrimaryKeyMetaKey(benthosConfig.TableSchema, benthosConfig.TableName, col), col), // map of original value to transformed value + WalkMetadata: false, + WalkJsonObject: false, + Kind: &b.redisConfig.Kind, + Master: b.redisConfig.Master, + Tls: shared.BuildBenthosRedisTlsConfig(b.redisConfig), + }, + }) + benthosConfig.RedisConfig = append(benthosConfig.RedisConfig, &bb_shared.BenthosRedisConfig{ + Key: hashedKey, + Table: tableKey, + Column: col, + }) + } + } + + columnTypes := []string{} + for _, c := range benthosConfig.Columns { + colType, ok := colInfoMap[c] + if ok { + columnTypes = append(columnTypes, colType.DataType) + } else { + columnTypes = append(columnTypes, "") + } + } + + prefix, suffix := getInsertPrefixAndSuffix(b.driver, benthosConfig.TableSchema, benthosConfig.TableName, columnDefaultProperties) + config.Outputs = append(config.Outputs, neosync_benthos.Outputs{ + Fallback: []neosync_benthos.Outputs{ + { + PooledSqlInsert: &neosync_benthos.PooledSqlInsert{ + Driver: b.driver, + Dsn: params.DSN, + + Schema: benthosConfig.TableSchema, + Table: benthosConfig.TableName, + Columns: benthosConfig.Columns, + ColumnsDataTypes: columnTypes, + ColumnDefaultProperties: columnDefaultProperties, + OnConflictDoNothing: destOpts.GetPostgresOptions().GetOnConflict().GetDoNothing(), + SkipForeignKeyViolations: destOpts.GetPostgresOptions().GetSkipForeignKeyViolations(), + TruncateOnRetry: destOpts.GetPostgresOptions().GetTruncateTable().GetTruncateBeforeInsert(), + ArgsMapping: buildPlainInsertArgs(benthosConfig.Columns), + Prefix: prefix, + Suffix: suffix, + + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }, + }, + // kills activity depending on error + {Error: &neosync_benthos.ErrorOutputConfig{ + ErrorMsg: `${! meta("fallback_error")}`, + Batching: &neosync_benthos.Batching{ + Period: "5s", + Count: 100, + }, + }}, + }, + }) + } + + return config, nil +} + +func getInsertPrefixAndSuffix( + driver, schema, table string, + columnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties, +) (prefix, suffix *string) { + var pre, suff *string + if len(columnDefaultProperties) == 0 { + return pre, suff + } + switch driver { + case sqlmanager_shared.MssqlDriver: + if hasPassthroughIdentityColumn(columnDefaultProperties) { + enableIdentityInsert := true + p := sqlmanager_mssql.BuildMssqlSetIdentityInsertStatement(schema, table, enableIdentityInsert) + pre = &p + s := sqlmanager_mssql.BuildMssqlSetIdentityInsertStatement(schema, table, !enableIdentityInsert) + suff = &s + } + return pre, suff + default: + return pre, suff + } +} + +func hasPassthroughIdentityColumn(columnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties) bool { + for _, d := range columnDefaultProperties { + if d.NeedsOverride && d.NeedsReset && !d.HasDefaultTransformer { + return true + } + } + return false +} + +// tries to get destination schema column info map +// if not uses source destination schema column info map +func getSqlSchemaColumnMap( + ctx context.Context, + destinationConnection *mgmtv1alpha1.Connection, + sourceSchemaColumnInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo, + sqlmanagerclient sqlmanager.SqlManagerClient, + slogger *slog.Logger, +) map[string]map[string]*sqlmanager_shared.ColumnInfo { + schemaColMap := sourceSchemaColumnInfoMap + switch destinationConnection.ConnectionConfig.Config.(type) { + case *mgmtv1alpha1.ConnectionConfig_PgConfig, *mgmtv1alpha1.ConnectionConfig_MysqlConfig, *mgmtv1alpha1.ConnectionConfig_MssqlConfig: + destDb, err := sqlmanagerclient.NewPooledSqlDb(ctx, slogger, destinationConnection) + if err != nil { + destDb.Db.Close() + return schemaColMap + } + destColMap, err := destDb.Db.GetSchemaColumnMap(ctx) + if err != nil { + destDb.Db.Close() + return schemaColMap + } + if len(destColMap) != 0 { + return mergeSourceDestinationColumnInfo(sourceSchemaColumnInfoMap, destColMap) + } + destDb.Db.Close() + } + return schemaColMap +} + +// Merges source db column info with destination db col info +// Destination db col info take precedence +func mergeSourceDestinationColumnInfo( + sourceCols map[string]map[string]*sqlmanager_shared.ColumnInfo, + destCols map[string]map[string]*sqlmanager_shared.ColumnInfo, +) map[string]map[string]*sqlmanager_shared.ColumnInfo { + mergedCols := map[string]map[string]*sqlmanager_shared.ColumnInfo{} + + for schemaTable, tableCols := range sourceCols { + mergedCols[schemaTable] = tableCols + } + + for schemaTable, tableCols := range destCols { + if _, ok := mergedCols[schemaTable]; !ok { + mergedCols[schemaTable] = make(map[string]*sqlmanager_shared.ColumnInfo) + } + for colName, colInfo := range tableCols { + mergedCols[schemaTable][colName] = colInfo + } + } + + return mergedCols +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync_test.go b/internal/benthos/benthos-builder/builders/sql_test.go similarity index 59% rename from worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync_test.go rename to internal/benthos/benthos-builder/builders/sql_test.go index 2c40dcb471..23f8a3e952 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync_test.go +++ b/internal/benthos/benthos-builder/builders/sql_test.go @@ -1,14 +1,10 @@ -package genbenthosconfigs_activity +package benthosbuilder_builders import ( "testing" mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" - sqlmanager_mssql "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/mssql" - sqlmanager_postgres "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/postgres" sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" - tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" "github.com/stretchr/testify/require" ) @@ -115,90 +111,6 @@ func TestFilterForeignKeysMap(t *testing.T) { } } -func Test_BuildPgPostTableSyncStatement(t *testing.T) { - t.Run("Update run type", func(t *testing.T) { - bcUpdate := &BenthosConfigResponse{ - RunType: tabledependency.RunTypeUpdate, - } - resultUpdate := buildPgPostTableSyncStatement(bcUpdate) - require.Empty(t, resultUpdate, "Expected empty slice for Update run type") - }) - - t.Run("No columns need reset", func(t *testing.T) { - bcNoReset := &BenthosConfigResponse{ - RunType: tabledependency.RunTypeInsert, - ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ - "col1": {NeedsReset: false, HasDefaultTransformer: false}, - "col2": {NeedsReset: false, HasDefaultTransformer: true}, - }, - TableSchema: "public", - TableName: "test_table", - } - resultNoReset := buildPgPostTableSyncStatement(bcNoReset) - require.Empty(t, resultNoReset, "Expected empty slice when no columns need reset") - }) - - t.Run("Some columns need reset", func(t *testing.T) { - bcSomeReset := &BenthosConfigResponse{ - RunType: tabledependency.RunTypeInsert, - ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ - "col1": {NeedsReset: true, HasDefaultTransformer: false}, - "col2": {NeedsReset: false, HasDefaultTransformer: true}, - "col3": {NeedsReset: true, HasDefaultTransformer: false}, - }, - TableSchema: "public", - TableName: "test_table", - } - resultSomeReset := buildPgPostTableSyncStatement(bcSomeReset) - expectedSomeReset := []string{ - sqlmanager_postgres.BuildPgIdentityColumnResetCurrentSql("public", "test_table", "col1"), - sqlmanager_postgres.BuildPgIdentityColumnResetCurrentSql("public", "test_table", "col3"), - } - require.ElementsMatch(t, expectedSomeReset, resultSomeReset, "Unexpected result when some columns need reset") - }) -} - -func Test_BuildMssqlPostTableSyncStatement(t *testing.T) { - t.Run("Update run type", func(t *testing.T) { - bcUpdate := &BenthosConfigResponse{ - RunType: tabledependency.RunTypeUpdate, - } - resultUpdate := buildMssqlPostTableSyncStatement(bcUpdate) - require.Empty(t, resultUpdate, "Expected empty slice for Update run type") - }) - - t.Run("No columns need override", func(t *testing.T) { - bcNoOverride := &BenthosConfigResponse{ - RunType: tabledependency.RunTypeInsert, - ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ - "col1": {NeedsOverride: false}, - "col2": {NeedsOverride: false}, - }, - TableSchema: "dbo", - TableName: "test_table", - } - resultNoOverride := buildMssqlPostTableSyncStatement(bcNoOverride) - require.Empty(t, resultNoOverride, "Expected empty slice when no columns need override") - }) - - t.Run("Some columns need override", func(t *testing.T) { - bcSomeOverride := &BenthosConfigResponse{ - RunType: tabledependency.RunTypeInsert, - ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ - "col1": {NeedsOverride: true}, - "col2": {NeedsOverride: false}, - }, - TableSchema: "dbo", - TableName: "test_table", - } - resultSomeOverride := buildMssqlPostTableSyncStatement(bcSomeOverride) - expectedSomeOverride := []string{ - sqlmanager_mssql.BuildMssqlIdentityColumnResetCurrent("dbo", "test_table"), - } - require.Equal(t, expectedSomeOverride, resultSomeOverride, "Unexpected result when some columns need override") - }) -} - func Test_isNullJobMappingTransformer(t *testing.T) { t.Run("yes", func(t *testing.T) { actual := isNullJobMappingTransformer(&mgmtv1alpha1.JobMappingTransformer{ diff --git a/internal/benthos/benthos-builder/generate-benthos.go b/internal/benthos/benthos-builder/generate-benthos.go new file mode 100644 index 0000000000..0b63381bea --- /dev/null +++ b/internal/benthos/benthos-builder/generate-benthos.go @@ -0,0 +1,150 @@ +package benthosbuilder + +import ( + "context" + "fmt" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/pkg/metrics" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_internal "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/internal" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" + + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +) + +func (b *BenthosConfigManager) GenerateBenthosConfigs( + ctx context.Context, +) ([]*BenthosConfigResponse, error) { + dbBuilder, err := b.sourceProvider.GetBuilder(b.job, b.sourceConnection) + if err != nil { + return nil, fmt.Errorf("unable to create benthos builder: %w", err) + } + b.logger.Debug("created source benthos builder") + + sourceParams := &bb_internal.SourceParams{ + Job: b.job, + RunId: b.runId, + SourceConnection: b.sourceConnection, + Logger: b.logger, + } + + sourceConfigs, err := dbBuilder.BuildSourceConfigs(ctx, sourceParams) + if err != nil { + return nil, err + } + b.logger.Debug(fmt.Sprintf("built %d source configs", len(sourceConfigs))) + + destinationOpts := buildDestinationOptionsMap(b.job.GetDestinations()) + + b.logger.Debug(fmt.Sprintf("building %d destination configs", len(b.destinationConnections))) + responses := []*BenthosConfigResponse{} + for destIdx, destConnection := range b.destinationConnections { + destBuilder, err := b.destinationProvider.GetBuilder(b.job, destConnection) + if err != nil { + return nil, fmt.Errorf("unable to create destination builder: %w", err) + } + b.logger.Debug("created destination benthos builder for destination") + + destOpts, ok := destinationOpts[destConnection.GetId()] + if !ok { + return nil, fmt.Errorf("unable to find destination options for connection: %s", destConnection.GetId()) + } + + for _, sourceConfig := range sourceConfigs { + dstEnvVarKey := fmt.Sprintf("DESTINATION_%d_CONNECTION_DSN", destIdx) + dsn := fmt.Sprintf("${%s}", dstEnvVarKey) + destParams := &bb_internal.DestinationParams{ + SourceConfig: sourceConfig, + Job: b.job, + RunId: b.runId, + DestinationOpts: destOpts, + DestConnection: destConnection, + DestEnvVarKey: dstEnvVarKey, + DSN: dsn, + Logger: b.logger, + } + + destConfig, err := destBuilder.BuildDestinationConfig(ctx, destParams) + if err != nil { + return nil, err + } + sourceConfig.Config.Output.Broker.Outputs = append(sourceConfig.Config.Output.Broker.Outputs, destConfig.Outputs...) + sourceConfig.BenthosDsns = append(sourceConfig.BenthosDsns, destConfig.BenthosDsns...) + } + b.logger.Debug(fmt.Sprintf("applied destination to %d source configs", len(sourceConfigs))) + } + + if b.metricsEnabled { + b.logger.Debug("metrics enabled. applying metric labels") + labels := metrics.MetricLabels{ + metrics.NewEqLabel(metrics.AccountIdLabel, b.job.AccountId), + metrics.NewEqLabel(metrics.JobIdLabel, b.job.Id), + metrics.NewEqLabel(metrics.NeosyncDateLabel, bb_shared.WithEnvInterpolation(metrics.NeosyncDateEnvKey)), + } + for key, val := range b.metricLabelKeyVals { + labels = append(labels, metrics.NewEqLabel(key, val)) + } + for _, resp := range sourceConfigs { + joinedLabels := append(labels, resp.Metriclabels...) //nolint:gocritic + resp.Config.Metrics = &neosync_benthos.Metrics{ + OtelCollector: &neosync_benthos.MetricsOtelCollector{}, + Mapping: joinedLabels.ToBenthosMeta(), + } + } + } + + var outputConfigs []*bb_internal.BenthosSourceConfig + if isOnlyBucketDestinations(b.job.Destinations) { + for _, sc := range sourceConfigs { + if sc.RunType == tabledependency.RunTypeInsert { + sc.DependsOn = []*tabledependency.DependsOn{} + outputConfigs = append(outputConfigs, sc) + } + } + } else { + outputConfigs = sourceConfigs + } + + for _, config := range outputConfigs { + response := convertToResponse(config) + responses = append(responses, response) + } + + b.logger.Info(fmt.Sprintf("successfully built %d benthos configs", len(responses))) + return responses, nil +} + +// builds map of destination id -> destination options +func buildDestinationOptionsMap(jobDests []*mgmtv1alpha1.JobDestination) map[string]*mgmtv1alpha1.JobDestinationOptions { + destOpts := map[string]*mgmtv1alpha1.JobDestinationOptions{} + for _, dest := range jobDests { + destOpts[dest.GetConnectionId()] = dest.GetOptions() + } + return destOpts +} + +func convertToResponse(sourceConfig *bb_internal.BenthosSourceConfig) *BenthosConfigResponse { + return &BenthosConfigResponse{ + Name: sourceConfig.Name, + Config: sourceConfig.Config, + DependsOn: sourceConfig.DependsOn, + TableSchema: sourceConfig.TableSchema, + TableName: sourceConfig.TableName, + Columns: sourceConfig.Columns, + RunType: sourceConfig.RunType, + ColumnDefaultProperties: sourceConfig.ColumnDefaultProperties, + RedisDependsOn: sourceConfig.RedisDependsOn, + BenthosDsns: sourceConfig.BenthosDsns, + RedisConfig: sourceConfig.RedisConfig, + } +} + +func isOnlyBucketDestinations(destinations []*mgmtv1alpha1.JobDestination) bool { + for _, dest := range destinations { + if dest.GetOptions().GetAwsS3Options() == nil && dest.GetOptions().GetGcpCloudstorageOptions() == nil { + return false + } + } + return true +} diff --git a/internal/benthos/benthos-builder/internal/types.go b/internal/benthos/benthos-builder/internal/types.go new file mode 100644 index 0000000000..4ede73e901 --- /dev/null +++ b/internal/benthos/benthos-builder/internal/types.go @@ -0,0 +1,158 @@ +package benthosbuilder_internal + +import ( + "context" + "fmt" + "log/slog" + + mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" + "github.com/nucleuscloud/neosync/backend/pkg/metrics" + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" + neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +) + +// ConnectionType represents supported connection types +type ConnectionType string + +const ( + ConnectionTypePostgres ConnectionType = "postgres" + ConnectionTypeMysql ConnectionType = "mysql" + ConnectionTypeMssql ConnectionType = "mssql" + ConnectionTypeAwsS3 ConnectionType = "aws-s3" + ConnectionTypeGCP ConnectionType = "gcp-cloud-storage" + ConnectionTypeMongo ConnectionType = "mongodb" + ConnectionTypeDynamodb ConnectionType = "aws-dynamodb" + ConnectionTypeLocalDir ConnectionType = "local-directory" + ConnectionTypeOpenAI ConnectionType = "openai" + ConnectionTypeNeosyncData ConnectionType = "neosync-data-stream" +) + +// Determines type of connection from Connection +func GetConnectionType(connection *mgmtv1alpha1.Connection) ConnectionType { + switch connection.GetConnectionConfig().GetConfig().(type) { + case *mgmtv1alpha1.ConnectionConfig_PgConfig: + return ConnectionTypePostgres + case *mgmtv1alpha1.ConnectionConfig_MysqlConfig: + return ConnectionTypeMysql + case *mgmtv1alpha1.ConnectionConfig_MssqlConfig: + return ConnectionTypeMssql + case *mgmtv1alpha1.ConnectionConfig_AwsS3Config: + return ConnectionTypeAwsS3 + case *mgmtv1alpha1.ConnectionConfig_GcpCloudstorageConfig: + return ConnectionTypeGCP + case *mgmtv1alpha1.ConnectionConfig_MongoConfig: + return ConnectionTypeMongo + case *mgmtv1alpha1.ConnectionConfig_DynamodbConfig: + return ConnectionTypeDynamodb + case *mgmtv1alpha1.ConnectionConfig_LocalDirConfig: + return ConnectionTypeLocalDir + case *mgmtv1alpha1.ConnectionConfig_OpenaiConfig: + return ConnectionTypeOpenAI + default: + return "unknown" + } +} + +// Determines SQL driver from connection type +func GetSqlDriverByConnectionType(connectionType ConnectionType) (string, error) { + switch connectionType { + case ConnectionTypePostgres: + return sqlmanager_shared.PostgresDriver, nil + case ConnectionTypeMysql: + return sqlmanager_shared.MysqlDriver, nil + case ConnectionTypeMssql: + return sqlmanager_shared.MssqlDriver, nil + default: + return "", fmt.Errorf("unsupported SQL connection type: %s", connectionType) + } +} + +// JobType represents the type of job +type JobType string + +const ( + JobTypeSync JobType = "sync" + JobTypeGenerate JobType = "generate" + JobTypeAIGenerate JobType = "ai-generate" +) + +// Determines type of job from Job +func GetJobType(job *mgmtv1alpha1.Job) JobType { + switch job.GetSource().GetOptions().GetConfig().(type) { + case *mgmtv1alpha1.JobSourceOptions_Postgres, + *mgmtv1alpha1.JobSourceOptions_Mysql, + *mgmtv1alpha1.JobSourceOptions_Mssql, + *mgmtv1alpha1.JobSourceOptions_Mongodb, + *mgmtv1alpha1.JobSourceOptions_Dynamodb, + *mgmtv1alpha1.JobSourceOptions_AwsS3: + return JobTypeSync + case *mgmtv1alpha1.JobSourceOptions_Generate: + return JobTypeGenerate + case *mgmtv1alpha1.JobSourceOptions_AiGenerate: + return JobTypeAIGenerate + default: + return "" + } +} + +// Handles both source (input) and destination (output) configurations for different +// connection types (postgres, mysql...) and job types (e.g., sync, generate...). +type BenthosBuilder interface { + // BuildSourceConfigs generates Benthos source configurations for reading and processing data. + // Returns a config for each schema.table in job mappings + BuildSourceConfigs(ctx context.Context, params *SourceParams) ([]*BenthosSourceConfig, error) + // BuildDestinationConfig creates a Benthos destination configuration for writing processed data. + // Returns single config for a schema.table configuration + BuildDestinationConfig(ctx context.Context, params *DestinationParams) (*BenthosDestinationConfig, error) +} + +// SourceParams contains all parameters needed to build a source benthos configuration +type SourceParams struct { + Job *mgmtv1alpha1.Job + RunId string + SourceConnection *mgmtv1alpha1.Connection + Logger *slog.Logger +} + +type ReferenceKey struct { + Table string + Column string +} + +// DestinationParams contains all parameters needed to build a destination benthos configuration +type DestinationParams struct { + SourceConfig *BenthosSourceConfig + Job *mgmtv1alpha1.Job + RunId string + DestinationOpts *mgmtv1alpha1.JobDestinationOptions + DestConnection *mgmtv1alpha1.Connection + DestEnvVarKey string + DSN string + Logger *slog.Logger +} + +// BenthosSourceConfig represents a Benthos source configuration +type BenthosSourceConfig struct { + Config *neosync_benthos.BenthosConfig + Name string + DependsOn []*tabledependency.DependsOn + RunType tabledependency.RunType + TableSchema string + TableName string + Columns []string + RedisDependsOn map[string][]string + ColumnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties + Processors []*neosync_benthos.ProcessorConfig + BenthosDsns []*bb_shared.BenthosDsn + RedisConfig []*bb_shared.BenthosRedisConfig + PrimaryKeys []string + Metriclabels metrics.MetricLabels +} + +// BenthosDestinationConfig represents a Benthos destination configuration +type BenthosDestinationConfig struct { + Outputs []neosync_benthos.Outputs + BenthosDsns []*bb_shared.BenthosDsn +} diff --git a/internal/benthos/benthos-builder/shared/types.go b/internal/benthos/benthos-builder/shared/types.go new file mode 100644 index 0000000000..534e78a893 --- /dev/null +++ b/internal/benthos/benthos-builder/shared/types.go @@ -0,0 +1,37 @@ +package benthosbuilder_shared + +import ( + "fmt" + + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" +) + +// Holds the environment variable name and the connection id that should replace it at runtime when the Sync activity is launched +type BenthosDsn struct { + EnvVarKey string + // Neosync Connection Id + ConnectionId string +} + +// Keeps track of redis keys for clean up after syncing a table +type BenthosRedisConfig struct { + Key string + Table string // schema.table + Column string +} + +// querybuilder wrapper to avoid cgo in the cli +type SelectQueryMapBuilder interface { + BuildSelectQueryMap( + driver string, + tableFkConstraints map[string][]*sqlmanager_shared.ForeignConstraint, + runConfigs []*tabledependency.RunConfig, + subsetByForeignKeyConstraints bool, + groupedColumnInfo map[string]map[string]*sqlmanager_shared.ColumnInfo, + ) (map[string]map[tabledependency.RunType]string, error) +} + +func WithEnvInterpolation(input string) string { + return fmt.Sprintf("${%s}", input) +} diff --git a/internal/testutil/testcontainers/redis/redis.go b/internal/testutil/testcontainers/redis/redis.go new file mode 100644 index 0000000000..1d7a61945a --- /dev/null +++ b/internal/testutil/testcontainers/redis/redis.go @@ -0,0 +1,58 @@ +package testcontainers_redis + +import ( + "context" + + "github.com/testcontainers/testcontainers-go/modules/redis" + testredis "github.com/testcontainers/testcontainers-go/modules/redis" +) + +// Holds the Redis test container and connection string. +type RedisTestContainer struct { + URL string + TestContainer *testredis.RedisContainer +} + +// Option is a functional option for configuring the Redis Test Container +type Option func(*RedisTestContainer) + +// NewRedisTestContainer initializes a new Redis Test Container with functional options +func NewRedisTestContainer(ctx context.Context, opts ...Option) (*RedisTestContainer, error) { + r := &RedisTestContainer{} + for _, opt := range opts { + opt(r) + } + return r.Setup(ctx) +} + +// Creates and starts a Redis test container +func (r *RedisTestContainer) Setup(ctx context.Context) (*RedisTestContainer, error) { + redisContainer, err := redis.Run( + ctx, + "docker.io/redis:7", + redis.WithSnapshotting(10, 1), + redis.WithLogLevel(redis.LogLevelVerbose), + ) + if err != nil { + return nil, err + } + redisUrl, err := redisContainer.ConnectionString(ctx) + if err != nil { + return nil, err + } + + return &RedisTestContainer{ + URL: redisUrl, + TestContainer: redisContainer, + }, nil +} + +// Terminates the container. +func (r *RedisTestContainer) TearDown(ctx context.Context) error { + if r.TestContainer != nil { + if err := r.TestContainer.Terminate(ctx); err != nil { + panic(err) + } + } + return nil +} diff --git a/internal/testutil/testdata/mysql/humanresources/create-tables.sql b/internal/testutil/testdata/mysql/humanresources/create-tables.sql index 39a32b25bb..da82ae4c15 100644 --- a/internal/testutil/testdata/mysql/humanresources/create-tables.sql +++ b/internal/testutil/testdata/mysql/humanresources/create-tables.sql @@ -223,3 +223,32 @@ INSERT INTO departments(department_id,department_name,location_id) VALUES (11,'A -- INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_id) VALUES (28,'Woody','Russell','Child',145); -- INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_id) VALUES (29,'Alec','Partners','Child',146); -- INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_id) VALUES (30,'Sandra','Taylor','Child',176); + + +-- table with generated columns +CREATE TABLE generated_table ( + -- Auto Incremented column + id INT AUTO_INCREMENT PRIMARY KEY, + price DECIMAL(10,2) NOT NULL, + quantity INT NOT NULL, + discount_percent DECIMAL(5,2) DEFAULT 0, + + -- Virtual generated column + total_value DECIMAL(12,2) AS (price * quantity) VIRTUAL, + + -- Stored generated column + discounted_price DECIMAL(10,2) AS (price * (1 - discount_percent/100)) STORED +); + + +INSERT INTO generated_table (price, quantity, discount_percent) VALUES + (99.99, 5, 10.00), + (499.99, 1, 0), + (19.99, 20, 15.00), + (299.99, 2, 25.00), + (9.99, 100, 5.00), + (149.99, 3, 12.50), + (799.99, 1, 30.00), + (49.99, 10, 8.00), + (1299.99, 1, 20.00), + (29.99, 15, 0); diff --git a/internal/testutil/testdata/postgres/humanresources/create-tables.sql b/internal/testutil/testdata/postgres/humanresources/create-tables.sql index 5e9d8b3d58..56276c8ca0 100644 --- a/internal/testutil/testdata/postgres/humanresources/create-tables.sql +++ b/internal/testutil/testdata/postgres/humanresources/create-tables.sql @@ -223,3 +223,23 @@ INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_i INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_id) VALUES (28,'Woody','Russell','Child',145); INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_id) VALUES (29,'Alec','Partners','Child',146); INSERT INTO dependents(dependent_id,first_name,last_name,relationship,employee_id) VALUES (30,'Sandra','Taylor','Child',176); + +-- Table with generated columns +CREATE TABLE generated_table ( + -- Identity column + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + + amount DECIMAL(10,2) NOT NULL, + status text NOT NULL DEFAULT 'pending', + + -- Generated stored column + amount_with_tax DECIMAL(10,2) GENERATED ALWAYS AS (amount * 1.2) STORED +); + + +INSERT INTO generated_table (amount, status) +VALUES + (199.99, 'pending'), + (299.50, 'processing'), + (499.99, 'completed'), + (150.00, 'cancelled'); diff --git a/worker/pkg/benthos/config.go b/worker/pkg/benthos/config.go index 99388eeaf4..29c528f079 100644 --- a/worker/pkg/benthos/config.go +++ b/worker/pkg/benthos/config.go @@ -71,8 +71,6 @@ type Inputs struct { } type NeosyncConnectionData struct { - ApiKey *string `json:"api_key,omitempty" yaml:"api_key,omitempty"` - ApiUrl string `json:"api_url" yaml:"api_url"` ConnectionId string `json:"connection_id" yaml:"connection_id"` ConnectionType string `json:"connection_type" yaml:"connection_type"` JobId *string `json:"job_id,omitempty" yaml:"job_id,omitempty"` diff --git a/worker/pkg/benthos/neosync_connection_data/neosync_connection_data_input.go b/worker/pkg/benthos/neosync_connection_data/neosync_connection_data_input.go index 207337c84a..f613a95b12 100644 --- a/worker/pkg/benthos/neosync_connection_data/neosync_connection_data_input.go +++ b/worker/pkg/benthos/neosync_connection_data/neosync_connection_data_input.go @@ -15,8 +15,6 @@ import ( var neosyncConnectionDataConfigSpec = service.NewConfigSpec(). Summary("Streams Neosync connection data"). - Field(service.NewStringField("api_key").Optional()). - Field(service.NewStringField("api_url")). Field(service.NewStringField("connection_id")). Field(service.NewStringField("connection_type")). Field(service.NewStringField("schema")). @@ -107,7 +105,7 @@ type neosyncInput struct { func (g *neosyncInput) Connect(ctx context.Context) error { var streamCfg *mgmtv1alpha1.ConnectionStreamConfig - if g.connectionType == "awsS3" { + if g.connectionType == "aws-s3" { awsS3Cfg := &mgmtv1alpha1.AwsS3StreamConfig{} if g.connectionOpts != nil { if g.connectionOpts.jobRunId != nil && *g.connectionOpts.jobRunId != "" { @@ -121,7 +119,7 @@ func (g *neosyncInput) Connect(ctx context.Context) error { AwsS3Config: awsS3Cfg, }, } - } else if g.connectionType == "gcpCloudStorage" { + } else if g.connectionType == "gcp-cloud-storage" { if g.connectionOpts != nil { gcpCfg := &mgmtv1alpha1.GcpCloudStorageStreamConfig{} if g.connectionOpts != nil { diff --git a/worker/pkg/benthos/sql/output_sql_insert.go b/worker/pkg/benthos/sql/output_sql_insert.go index 2c7a4c23ff..0bab4aa4bb 100644 --- a/worker/pkg/benthos/sql/output_sql_insert.go +++ b/worker/pkg/benthos/sql/output_sql_insert.go @@ -303,11 +303,11 @@ func (s *pooledInsertOutput) WriteBatch(ctx context.Context, batch service.Messa insertQuery = sqlserverutil.GeSqlServerDefaultValuesInsertSql(s.schema, s.table, len(rows)) } - if s.driver == sqlmanager_shared.PostgresDriver && shouldOverrideColumnDefault(s.columnDefaultProperties) { + if isSupportedPostgresDriver(s.driver) && shouldOverrideColumnDefault(s.columnDefaultProperties) { insertQuery = sqlmanager_postgres.BuildPgInsertIdentityAlwaysSql(insertQuery) } - if s.driver != sqlmanager_shared.PostgresDriver { + if !isSupportedPostgresDriver(s.driver) { insertQuery = s.buildQuery(insertQuery) } @@ -323,6 +323,10 @@ func (s *pooledInsertOutput) WriteBatch(ctx context.Context, batch service.Messa return nil } +func isSupportedPostgresDriver(driver string) bool { + return driver == sqlmanager_shared.PostgresDriver || driver == "postgres" +} + func shouldOverrideColumnDefault(columnDefaults map[string]*neosync_benthos.ColumnDefaultProperties) bool { for _, d := range columnDefaults { if !d.HasDefaultTransformer && d.NeedsOverride { @@ -345,7 +349,7 @@ func (s *pooledInsertOutput) RetryInsertRowByRow( if err != nil { return err } - if s.driver != sqlmanager_shared.PostgresDriver { + if !isSupportedPostgresDriver(s.driver) { insertQuery = s.buildQuery(insertQuery) } _, err = s.db.ExecContext(ctx, insertQuery, args...) diff --git a/worker/pkg/query-builder2/wrapper.go b/worker/pkg/query-builder2/wrapper.go new file mode 100644 index 0000000000..05804ae386 --- /dev/null +++ b/worker/pkg/query-builder2/wrapper.go @@ -0,0 +1,26 @@ +package querybuilder2 + +import ( + sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" +) + +// QueryMapBuilderWrapper implements the SelectQueryMapBuilder interface +type QueryMapBuilderWrapper struct{} + +// BuildSelectQueryMap wraps the original BuildSelectQueryMap function +func (w *QueryMapBuilderWrapper) BuildSelectQueryMap( + driver string, + tableFkConstraints map[string][]*sqlmanager_shared.ForeignConstraint, + runConfigs []*tabledependency.RunConfig, + subsetByForeignKeyConstraints bool, + groupedColumnInfo map[string]map[string]*sqlmanager_shared.ColumnInfo, +) (map[string]map[tabledependency.RunType]string, error) { + return BuildSelectQueryMap( + driver, + tableFkConstraints, + runConfigs, + subsetByForeignKeyConstraints, + groupedColumnInfo, + ) +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/activity.go b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/activity.go index a49c9d30b7..3d9941d1a3 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/activity.go +++ b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/activity.go @@ -6,10 +6,8 @@ import ( "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" neosynclogger "github.com/nucleuscloud/neosync/backend/pkg/logger" - "github.com/nucleuscloud/neosync/backend/pkg/metrics" sql_manager "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" - tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" "go.temporal.io/sdk/activity" "go.temporal.io/sdk/log" @@ -19,37 +17,10 @@ type GenerateBenthosConfigsRequest struct { JobId string } type GenerateBenthosConfigsResponse struct { - BenthosConfigs []*BenthosConfigResponse + BenthosConfigs []*benthosbuilder.BenthosConfigResponse AccountId string } -type BenthosRedisConfig struct { - Key string - Table string // schema.table - Column string -} - -type BenthosConfigResponse struct { - Name string - DependsOn []*tabledependency.DependsOn - RunType tabledependency.RunType - Config *neosync_benthos.BenthosConfig - TableSchema string - TableName string - Columns []string - RedisDependsOn map[string][]string - ColumnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties - SourceConnectionType string // used for logging - - Processors []*neosync_benthos.ProcessorConfig - BenthosDsns []*shared.BenthosDsn - RedisConfig []*BenthosRedisConfig - - primaryKeys []string - - metriclabels metrics.MetricLabels -} - type Activity struct { jobclient mgmtv1alpha1connect.JobServiceClient connclient mgmtv1alpha1connect.ConnectionServiceClient @@ -120,5 +91,5 @@ func (a *Activity) GenerateBenthosConfigs( a.metricsEnabled, ) slogger := neosynclogger.NewJsonSLogger().With(loggerKeyVals...) - return bbuilder.GenerateBenthosConfigs(ctx, req, &workflowMetadata{WorkflowId: info.WorkflowExecution.ID}, slogger) + return bbuilder.GenerateBenthosConfigsNew(ctx, req, &workflowMetadata{WorkflowId: info.WorkflowExecution.ID}, slogger) } diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder.go b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder.go index ebc74ca154..112ba67681 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder.go +++ b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder.go @@ -3,7 +3,6 @@ package genbenthosconfigs_activity import ( "context" "encoding/json" - "errors" "fmt" "log/slog" @@ -12,17 +11,15 @@ import ( "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" "github.com/nucleuscloud/neosync/backend/pkg/metrics" "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" - sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" + sqlmanager_mssql "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/mssql" + sqlmanager_postgres "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/postgres" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" + bb_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" + querybuilder2 "github.com/nucleuscloud/neosync/worker/pkg/query-builder2" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" - "gopkg.in/yaml.v3" -) -const ( - jobmappingSubsetErrMsg = "job mappings are not equal to or a subset of the database schema found in the source connection" - haltOnSchemaAdditionErrMsg = "job mappings does not contain a column mapping for all " + - "columns found in the source connection for the selected schemas and tables" + "gopkg.in/yaml.v3" ) type benthosBuilder struct { @@ -71,7 +68,7 @@ type workflowMetadata struct { WorkflowId string } -func (b *benthosBuilder) GenerateBenthosConfigs( +func (b *benthosBuilder) GenerateBenthosConfigsNew( ctx context.Context, req *GenerateBenthosConfigsRequest, wfmetadata *workflowMetadata, @@ -81,284 +78,69 @@ func (b *benthosBuilder) GenerateBenthosConfigs( if err != nil { return nil, fmt.Errorf("unable to get job by id: %w", err) } - responses := []*BenthosConfigResponse{} - - // reverse of table dependency - // map of foreign key to source table + column - var primaryKeyToForeignKeysMap map[string]map[string][]*referenceKey // schema.table -> column -> ForeignKey - var colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer // schema.table -> column -> transformer - var sqlSourceSchemaColumnInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo // schema.table -> column -> column info struct - var aiGroupedTableCols map[string][]string // map of table key to columns for AI Generated schemas - - switch job.Source.Options.Config.(type) { - case *mgmtv1alpha1.JobSourceOptions_AiGenerate: - sourceResponses, aimappings, err := b.getAiGenerateBenthosConfigResponses(ctx, job, slogger) - if err != nil { - return nil, fmt.Errorf("unable to build benthos AI Generate source config responses: %w", err) - } - aiGroupedTableCols = aimappings - responses = append(responses, sourceResponses...) - case *mgmtv1alpha1.JobSourceOptions_Generate: - sourceResponses, err := b.getGenerateBenthosConfigResponses(ctx, job, slogger) - if err != nil { - return nil, fmt.Errorf("unable to build benthos Generate source config responses: %w", err) - } - responses = append(responses, sourceResponses...) - case *mgmtv1alpha1.JobSourceOptions_Postgres, *mgmtv1alpha1.JobSourceOptions_Mysql, *mgmtv1alpha1.JobSourceOptions_Mssql: - resp, err := b.getSqlSyncBenthosConfigResponses(ctx, job, slogger) - if err != nil { - return nil, fmt.Errorf("unable to build benthos sql sync source config responses: %w", err) - } - primaryKeyToForeignKeysMap = resp.primaryKeyToForeignKeysMap - colTransformerMap = resp.ColumnTransformerMap - sqlSourceSchemaColumnInfoMap = resp.SchemaColumnInfoMap - responses = append(responses, resp.BenthosConfigs...) - case *mgmtv1alpha1.JobSourceOptions_Mongodb: - resp, err := b.getMongoDbSyncBenthosConfigResponses(ctx, job, slogger) - if err != nil { - return nil, fmt.Errorf("unable to build benthos mongo sync source config responses: %w", err) - } - responses = append(responses, resp.BenthosConfigs...) - case *mgmtv1alpha1.JobSourceOptions_Dynamodb: - resp, err := b.getDynamoDbSyncBenthosConfigResponses(ctx, job, slogger) - if err != nil { - return nil, fmt.Errorf("unable to build benthos dynamodb sync source config responses: %w", err) - } - responses = append(responses, resp.BenthosConfigs...) - default: - return nil, fmt.Errorf("unsupported job source: %T", job.GetSource().GetOptions().GetConfig()) + sourceConnection, err := shared.GetJobSourceConnection(ctx, job.GetSource(), b.connclient) + if err != nil { + return nil, fmt.Errorf("unable to get connection by id: %w", err) } - for destIdx, destination := range job.Destinations { + destConnections := []*mgmtv1alpha1.Connection{} + for _, destination := range job.Destinations { destinationConnection, err := shared.GetConnectionById(ctx, b.connclient, destination.ConnectionId) if err != nil { return nil, fmt.Errorf("unable to get destination connection (%s) by id: %w", destination.ConnectionId, err) } - destinationConnectionType := shared.GetConnectionType(destinationConnection) - slogger = slogger.With( - "destinationConnectionType", destinationConnectionType, - ) - sqlSchemaColMap := b.GetSqlSchemaColumnMap(ctx, destination, destinationConnection, sqlSourceSchemaColumnInfoMap, slogger) - for _, resp := range responses { - dstEnvVarKey := fmt.Sprintf("DESTINATION_%d_CONNECTION_DSN", destIdx) - dsn := fmt.Sprintf("${%s}", dstEnvVarKey) - - switch connection := destinationConnection.ConnectionConfig.Config.(type) { - case *mgmtv1alpha1.ConnectionConfig_PgConfig, *mgmtv1alpha1.ConnectionConfig_MysqlConfig, *mgmtv1alpha1.ConnectionConfig_MssqlConfig: - driver, err := getSqlDriverFromConnection(destinationConnection) - if err != nil { - return nil, err - } - resp.BenthosDsns = append(resp.BenthosDsns, &shared.BenthosDsn{EnvVarKey: dstEnvVarKey, ConnectionId: destinationConnection.Id}) - if isSyncConfig(resp.Config.Input) { - // SQL sync output - var colInfoMap map[string]*sqlmanager_shared.ColumnInfo - colMap, ok := sqlSchemaColMap[neosync_benthos.BuildBenthosTable(resp.TableSchema, resp.TableName)] - if ok { - colInfoMap = colMap - } - - outputs, err := b.getSqlSyncBenthosOutput(driver, destination, resp, dsn, primaryKeyToForeignKeysMap, colTransformerMap, colInfoMap) - if err != nil { - return nil, err - } - resp.Config.Output.Broker.Outputs = append(resp.Config.Output.Broker.Outputs, outputs...) - } else if resp.Config.Input.Generate != nil { - // SQL generate output - outputs := b.getSqlGenerateOutput(driver, resp, destination, dsn) - resp.Config.Output.Broker.Outputs = append(resp.Config.Output.Broker.Outputs, outputs...) - } else if resp.Config.Input.OpenAiGenerate != nil { - // SQL AI generate output - outputs, err := b.getSqlAiGenerateOutput(driver, resp, destination, dsn, aiGroupedTableCols) - if err != nil { - return nil, err - } - resp.Config.Output.Broker.Outputs = append(resp.Config.Output.Broker.Outputs, outputs...) - } else { - return nil, errors.New("unable to build destination connection due to unsupported source connection") - } - case *mgmtv1alpha1.ConnectionConfig_AwsS3Config: - if resp.RunType == tabledependency.RunTypeUpdate { - continue - } - destinationOpts := destination.GetOptions().GetAwsS3Options() - if destinationOpts == nil { - return nil, errors.New("destination must have configured aws destination options") - } - outputs, err := b.getAwsS3SyncBenthosOutput(connection, resp, wfmetadata.WorkflowId, destinationOpts) - if err != nil { - return nil, fmt.Errorf("unable to build s3 destination connection: %w", err) - } - resp.Config.Output.Broker.Outputs = append(resp.Config.Output.Broker.Outputs, outputs...) - case *mgmtv1alpha1.ConnectionConfig_GcpCloudstorageConfig: - if resp.RunType == tabledependency.RunTypeUpdate { - continue - } - output := b.getGcpCloudStorageSyncBenthosOutput(connection, resp, wfmetadata.WorkflowId) - resp.Config.Output.Broker.Outputs = append(resp.Config.Output.Broker.Outputs, output...) - case *mgmtv1alpha1.ConnectionConfig_MongoConfig: - resp.BenthosDsns = append(resp.BenthosDsns, &shared.BenthosDsn{EnvVarKey: dstEnvVarKey, ConnectionId: destinationConnection.GetId()}) - if resp.Config.Input.PooledMongoDB != nil || resp.Config.Input.MongoDB != nil { - resp.Config.Output.PooledMongoDB = &neosync_benthos.OutputMongoDb{ - Url: dsn, - - Database: resp.TableSchema, - Collection: resp.TableName, - Operation: "update-one", - Upsert: true, - DocumentMap: ` - root = { - "$set": this - } - `, - FilterMap: ` - root._id = this._id - `, - WriteConcern: &neosync_benthos.MongoWriteConcern{ - W: "1", - }, - } - } else { - return nil, errors.New("unable to build destination connection due to unsupported source connection") - } - case *mgmtv1alpha1.ConnectionConfig_DynamodbConfig: - if resp.Config.Input.AwsDynamoDB == nil { - return nil, errors.New("unable to build destination connection due to unsupported source connection for dynamodb") - } - dynamoDestinationOpts := destination.GetOptions().GetDynamodbOptions() - if dynamoDestinationOpts == nil { - return nil, errors.New("destination must have configured dyanmodb options") - } - tableMap := map[string]string{} - for _, tm := range dynamoDestinationOpts.GetTableMappings() { - tableMap[tm.GetSourceTable()] = tm.GetDestinationTable() - } - mappedTable, ok := tableMap[resp.TableName] - if !ok { - return nil, fmt.Errorf("did not find table map for %q when building dynamodb destination config", resp.TableName) - } - resp.Config.Output.Broker.Outputs = append(resp.Config.Output.Broker.Outputs, neosync_benthos.Outputs{ - AwsDynamoDB: &neosync_benthos.OutputAwsDynamoDB{ - Table: mappedTable, - JsonMapColumns: map[string]string{ - "": ".", - }, - - Batching: &neosync_benthos.Batching{ - // https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html - // A single call to BatchWriteItem can transmit up to 16MB of data over the network, consisting of up to 25 item put or delete operations - // Specifying the count here may not be enough if the overall data is above 16MB. - // Benthos will fall back on error to single writes however - Period: "5s", - Count: 25, - }, - - Region: connection.DynamodbConfig.GetRegion(), - Endpoint: connection.DynamodbConfig.GetEndpoint(), - Credentials: buildBenthosS3Credentials(connection.DynamodbConfig.GetCredentials()), - }, - }) - default: - return nil, fmt.Errorf("unsupported destination connection config: %T", destinationConnection.GetConnectionConfig().GetConfig()) - } - } + destConnections = append(destConnections, destinationConnection) } - if b.metricsEnabled { - labels := metrics.MetricLabels{ - metrics.NewEqLabel(metrics.AccountIdLabel, job.AccountId), - metrics.NewEqLabel(metrics.JobIdLabel, job.Id), - metrics.NewEqLabel(metrics.TemporalWorkflowId, withEnvInterpolation(metrics.TemporalWorkflowIdEnvKey)), - metrics.NewEqLabel(metrics.TemporalRunId, withEnvInterpolation(metrics.TemporalRunIdEnvKey)), - metrics.NewEqLabel(metrics.NeosyncDateLabel, withEnvInterpolation(metrics.NeosyncDateEnvKey)), - } - for _, resp := range responses { - joinedLabels := append(labels, resp.metriclabels...) //nolint:gocritic - resp.Config.Metrics = &neosync_benthos.Metrics{ - OtelCollector: &neosync_benthos.MetricsOtelCollector{}, - Mapping: joinedLabels.ToBenthosMeta(), - } - } + benthosManagerConfig := &benthosbuilder.WorkerBenthosConfig{ + Job: job, + SourceConnection: sourceConnection, + DestinationConnections: destConnections, + RunId: wfmetadata.WorkflowId, + Logger: slogger, + Sqlmanagerclient: b.sqlmanagerclient, + Transformerclient: b.transformerclient, + Connectionclient: b.connclient, + RedisConfig: b.redisConfig, + SelectQueryBuilder: &querybuilder2.QueryMapBuilderWrapper{}, + MetricsEnabled: b.metricsEnabled, + MetricLabelKeyVals: map[string]string{ + metrics.TemporalWorkflowId: bb_shared.WithEnvInterpolation(metrics.TemporalWorkflowIdEnvKey), + metrics.TemporalRunId: bb_shared.WithEnvInterpolation(metrics.TemporalRunIdEnvKey), + }, } - - var outputConfigs []*BenthosConfigResponse - // hack to remove update configs when only syncing to s3 - if isOnlyBucketDestinations(job.Destinations) { - for _, r := range responses { - if r.RunType == tabledependency.RunTypeInsert { - outputConfigs = append(outputConfigs, r) - } - } - } else { - outputConfigs = responses + benthosManager, err := benthosbuilder.NewWorkerBenthosConfigManager(benthosManagerConfig) + if err != nil { + return nil, err + } + responses, err := benthosManager.GenerateBenthosConfigs(ctx) + if err != nil { + return nil, err } - postTableSyncRunCtx := buildPostTableSyncRunCtx(outputConfigs, job.Destinations) + // TODO move run context logic into benthos builder + postTableSyncRunCtx := buildPostTableSyncRunCtx(responses, job.Destinations) err = b.setPostTableSyncRunCtx(ctx, postTableSyncRunCtx, job.GetAccountId()) if err != nil { return nil, fmt.Errorf("unable to set all run contexts for post table sync configs: %w", err) } - outputConfigs, err = b.setRunContexts(ctx, outputConfigs, job.GetAccountId()) + outputConfigs, err := b.setRunContexts(ctx, responses, job.GetAccountId()) if err != nil { return nil, fmt.Errorf("unable to set all run contexts for benthos configs: %w", err) } - - slogger.Info(fmt.Sprintf("successfully built %d benthos configs", len(outputConfigs))) return &GenerateBenthosConfigsResponse{ + AccountId: job.AccountId, BenthosConfigs: outputConfigs, - AccountId: job.GetAccountId(), }, nil } -func withEnvInterpolation(input string) string { - return fmt.Sprintf("${%s}", input) -} - -// tries to get destination schema column info map -// if not uses source destination schema column info map -func (b *benthosBuilder) GetSqlSchemaColumnMap( - ctx context.Context, - destination *mgmtv1alpha1.JobDestination, - destinationConnection *mgmtv1alpha1.Connection, - sourceSchemaColumnInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo, - slogger *slog.Logger, -) map[string]map[string]*sqlmanager_shared.ColumnInfo { - schemaColMap := sourceSchemaColumnInfoMap - destOpts, err := shared.GetSqlJobDestinationOpts(destination.GetOptions()) - if err != nil || destOpts.InitSchema { - return schemaColMap - } - switch destinationConnection.ConnectionConfig.Config.(type) { - case *mgmtv1alpha1.ConnectionConfig_PgConfig, *mgmtv1alpha1.ConnectionConfig_MysqlConfig, *mgmtv1alpha1.ConnectionConfig_MssqlConfig: - destDb, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, slogger, destinationConnection) - if err != nil { - destDb.Db.Close() - return schemaColMap - } - destColMap, err := destDb.Db.GetSchemaColumnMap(ctx) - if err != nil { - destDb.Db.Close() - return schemaColMap - } - if len(destColMap) != 0 { - schemaColMap = destColMap - } - destDb.Db.Close() - } - return schemaColMap -} - -func isSyncConfig(input *neosync_benthos.InputConfig) bool { - return input.SqlSelect != nil || input.PooledSqlRaw != nil -} - // this method modifies the input responses by nilling out the benthos config. it returns the same slice for convenience func (b *benthosBuilder) setRunContexts( ctx context.Context, - responses []*BenthosConfigResponse, + responses []*benthosbuilder.BenthosConfigResponse, accountId string, -) ([]*BenthosConfigResponse, error) { +) ([]*benthosbuilder.BenthosConfigResponse, error) { rcstream := b.jobclient.SetRunContexts(ctx) for _, config := range responses { @@ -387,33 +169,6 @@ func (b *benthosBuilder) setRunContexts( return responses, nil } -func buildPostTableSyncRunCtx(benthosConfigs []*BenthosConfigResponse, destinations []*mgmtv1alpha1.JobDestination) map[string]*shared.PostTableSyncConfig { - postTableSyncRunCtx := map[string]*shared.PostTableSyncConfig{} // benthos_config_name -> config - for _, bc := range benthosConfigs { - destConfigs := map[string]*shared.PostTableSyncDestConfig{} - for _, destination := range destinations { - var stmts []string - switch destination.GetOptions().GetConfig().(type) { - case *mgmtv1alpha1.JobDestinationOptions_PostgresOptions: - stmts = buildPgPostTableSyncStatement(bc) - case *mgmtv1alpha1.JobDestinationOptions_MssqlOptions: - stmts = buildMssqlPostTableSyncStatement(bc) - } - if len(stmts) != 0 { - destConfigs[destination.GetConnectionId()] = &shared.PostTableSyncDestConfig{ - Statements: stmts, - } - } - } - if len(destConfigs) != 0 { - postTableSyncRunCtx[bc.Name] = &shared.PostTableSyncConfig{ - DestinationConfigs: destConfigs, - } - } - } - return postTableSyncRunCtx -} - func (b *benthosBuilder) setPostTableSyncRunCtx( ctx context.Context, postSyncConfigs map[string]*shared.PostTableSyncConfig, @@ -446,15 +201,6 @@ func (b *benthosBuilder) setPostTableSyncRunCtx( return nil } -func isOnlyBucketDestinations(destinations []*mgmtv1alpha1.JobDestination) bool { - for _, dest := range destinations { - if dest.GetOptions().GetAwsS3Options() == nil && dest.GetOptions().GetGcpCloudstorageOptions() == nil { - return false - } - } - return true -} - func (b *benthosBuilder) getJobById( ctx context.Context, jobId string, @@ -469,97 +215,61 @@ func (b *benthosBuilder) getJobById( return getjobResp.Msg.Job, nil } -func groupGenerateSourceOptionsByTable( - schemaOptions []*mgmtv1alpha1.GenerateSourceSchemaOption, -) map[string]*generateSourceTableOptions { - groupedMappings := map[string]*generateSourceTableOptions{} - - for idx := range schemaOptions { - schemaOpt := schemaOptions[idx] - for tidx := range schemaOpt.Tables { - tableOpt := schemaOpt.Tables[tidx] - key := neosync_benthos.BuildBenthosTable(schemaOpt.Schema, tableOpt.Table) - groupedMappings[key] = &generateSourceTableOptions{ - Count: int(tableOpt.RowCount), // todo: probably need to update rowcount int64 to int32 +func buildPostTableSyncRunCtx(benthosConfigs []*benthosbuilder.BenthosConfigResponse, destinations []*mgmtv1alpha1.JobDestination) map[string]*shared.PostTableSyncConfig { + postTableSyncRunCtx := map[string]*shared.PostTableSyncConfig{} // benthos_config_name -> config + for _, bc := range benthosConfigs { + destConfigs := map[string]*shared.PostTableSyncDestConfig{} + for _, destination := range destinations { + var stmts []string + switch destination.GetOptions().GetConfig().(type) { + case *mgmtv1alpha1.JobDestinationOptions_PostgresOptions: + stmts = buildPgPostTableSyncStatement(bc) + case *mgmtv1alpha1.JobDestinationOptions_MssqlOptions: + stmts = buildMssqlPostTableSyncStatement(bc) + } + if len(stmts) != 0 { + destConfigs[destination.GetConnectionId()] = &shared.PostTableSyncDestConfig{ + Statements: stmts, + } } } - } - - return groupedMappings -} - -func getSqlDriverFromConnection(conn *mgmtv1alpha1.Connection) (string, error) { - switch conn.ConnectionConfig.Config.(type) { - case *mgmtv1alpha1.ConnectionConfig_PgConfig: - return sqlmanager_shared.PostgresDriver, nil - case *mgmtv1alpha1.ConnectionConfig_MysqlConfig: - return sqlmanager_shared.MysqlDriver, nil - case *mgmtv1alpha1.ConnectionConfig_MssqlConfig: - return sqlmanager_shared.MssqlDriver, nil - default: - return "", fmt.Errorf("unsupported sql connection config") - } -} - -func groupSqlJobSourceOptionsByTable( - sqlSourceOpts *sqlJobSourceOpts, -) map[string]*sqlSourceTableOptions { - groupedMappings := map[string]*sqlSourceTableOptions{} - for _, schemaOpt := range sqlSourceOpts.SchemaOpt { - for tidx := range schemaOpt.Tables { - tableOpt := schemaOpt.Tables[tidx] - key := neosync_benthos.BuildBenthosTable(schemaOpt.Schema, tableOpt.Table) - groupedMappings[key] = &sqlSourceTableOptions{ - WhereClause: tableOpt.WhereClause, + if len(destConfigs) != 0 { + postTableSyncRunCtx[bc.Name] = &shared.PostTableSyncConfig{ + DestinationConfigs: destConfigs, } } } - return groupedMappings -} - -type tableMapping struct { - Schema string - Table string - Mappings []*mgmtv1alpha1.JobMapping + return postTableSyncRunCtx } -func groupMappingsByTable( - mappings []*mgmtv1alpha1.JobMapping, -) []*tableMapping { - groupedMappings := map[string][]*mgmtv1alpha1.JobMapping{} - - for _, mapping := range mappings { - key := neosync_benthos.BuildBenthosTable(mapping.Schema, mapping.Table) - groupedMappings[key] = append(groupedMappings[key], mapping) +func buildPgPostTableSyncStatement(bc *benthosbuilder.BenthosConfigResponse) []string { + statements := []string{} + if bc.RunType == tabledependency.RunTypeUpdate { + return statements } - - output := make([]*tableMapping, 0, len(groupedMappings)) - for key, mappings := range groupedMappings { - schema, table := sqlmanager_shared.SplitTableKey(key) - output = append(output, &tableMapping{ - Schema: schema, - Table: table, - Mappings: mappings, - }) + colDefaultProps := bc.ColumnDefaultProperties + for colName, p := range colDefaultProps { + if p.NeedsReset && !p.HasDefaultTransformer { + // resets sequences and identities + resetSql := sqlmanager_postgres.BuildPgIdentityColumnResetCurrentSql(bc.TableSchema, bc.TableName, colName) + statements = append(statements, resetSql) + } } - return output + return statements } -func getTableMappingsMap(groupedMappings []*tableMapping) map[string]*tableMapping { - groupedTableMapping := map[string]*tableMapping{} - for _, tm := range groupedMappings { - groupedTableMapping[neosync_benthos.BuildBenthosTable(tm.Schema, tm.Table)] = tm +func buildMssqlPostTableSyncStatement(bc *benthosbuilder.BenthosConfigResponse) []string { + statements := []string{} + if bc.RunType == tabledependency.RunTypeUpdate { + return statements } - return groupedTableMapping -} - -func getColumnTransformerMap(tableMappingMap map[string]*tableMapping) map[string]map[string]*mgmtv1alpha1.JobMappingTransformer { - colTransformerMap := map[string]map[string]*mgmtv1alpha1.JobMappingTransformer{} // schema.table -> column -> transformer - for table, mapping := range tableMappingMap { - colTransformerMap[table] = map[string]*mgmtv1alpha1.JobMappingTransformer{} - for _, m := range mapping.Mappings { - colTransformerMap[table][m.Column] = m.Transformer + colDefaultProps := bc.ColumnDefaultProperties + for _, p := range colDefaultProps { + if p.NeedsOverride { + // reset identity + resetSql := sqlmanager_mssql.BuildMssqlIdentityColumnResetCurrent(bc.TableSchema, bc.TableName) + statements = append(statements, resetSql) } } - return colTransformerMap + return statements } diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder_test.go b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder_test.go index 50d04c2cd4..6e03821ae3 100644 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder_test.go +++ b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/benthos-builder_test.go @@ -1,1544 +1,19 @@ package genbenthosconfigs_activity import ( - "context" - "fmt" - "log/slog" - "os" "testing" - "connectrpc.com/connect" mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" - "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" sqlmanager_mssql "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/mssql" sqlmanager_postgres "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/postgres" - sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - "github.com/nucleuscloud/neosync/internal/gotypeutil" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "github.com/warpstreamlabs/bento/public/bloblang" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" ) -const ( - mockJobId = "b1767636-3992-4cb4-9bf2-4bb9bddbf43c" - mockWorkflowId = "b1767636-3992-4cb4-9bf2-4bb9bddbf43c-workflowid" - mockRunId = "26444272-0bb0-4325-ae60-17dcd9744785" -) - -var dsn = "dsn" -var driver = sqlmanager_shared.PostgresDriver - -func Test_ProcessorConfigEmpty(t *testing.T) { - mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) - - tableMappings := map[string]*tableMapping{ - "public.users": {Schema: "public", - Table: "users", - Mappings: []*mgmtv1alpha1.JobMapping{ - { - Schema: "public", - Table: "users", - Column: "id", - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, - }, - }, - }, - { - Schema: "public", - Table: "users", - Column: "name", - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, - }, - }, - }, - }, - }} - - groupedSchemas := map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{ - OrdinalPosition: 1, - ColumnDefault: "324", - IsNullable: false, - DataType: "", - CharacterMaximumLength: nil, - NumericPrecision: nil, - NumericScale: nil, - }, - "name": &sqlmanager_shared.ColumnInfo{ - OrdinalPosition: 1, - ColumnDefault: "324", - IsNullable: false, - DataType: "", - CharacterMaximumLength: nil, - NumericPrecision: nil, - NumericScale: nil, - }, - }, - } - groupedTransformers := map[string]map[string]*mgmtv1alpha1.JobMappingTransformer{ - "public.users": { - "id": &mgmtv1alpha1.JobMappingTransformer{}, - "name": &mgmtv1alpha1.JobMappingTransformer{}, - }, - } - queryMap := map[string]map[tabledependency.RunType]string{ - "public.users": {tabledependency.RunTypeInsert: ""}, - } - runconfigs := []*tabledependency.RunConfig{ - tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"id", "name"}, []string{"id", "name"}, []*tabledependency.DependsOn{}, false), - } - logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) - - res, err := buildBenthosSqlSourceConfigResponses( - logger, - context.Background(), - mockTransformerClient, - tableMappings, - runconfigs, - dsn, - driver, - queryMap, - groupedSchemas, - map[string][]*sqlmanager_shared.ForeignConstraint{}, - groupedTransformers, - mockJobId, - mockRunId, - nil, - nil, - "postgres", - ) - require.Nil(t, err) - require.Empty(t, res[0].Config.StreamConfig.Pipeline.Processors) -} - -func Test_ProcessorConfigEmptyJavascript(t *testing.T) { - mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) - - tableMappings := map[string]*tableMapping{ - "public.users": {Schema: "public", - Table: "users", - Mappings: []*mgmtv1alpha1.JobMapping{ - { - Schema: "public", - Table: "users", - Column: "id", - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{ - PassthroughConfig: &mgmtv1alpha1.Passthrough{}, - }, - }, - }, - }, - { - Schema: "public", - Table: "users", - Column: "name", - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformJavascriptConfig{ - TransformJavascriptConfig: &mgmtv1alpha1.TransformJavascript{Code: ""}, - }, - }, - }, - }, - }, - }} - - groupedSchemas := map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{ - OrdinalPosition: 1, - ColumnDefault: "324", - IsNullable: false, - DataType: "", - CharacterMaximumLength: nil, - NumericPrecision: nil, - NumericScale: nil, - }, - "name": &sqlmanager_shared.ColumnInfo{ - OrdinalPosition: 1, - ColumnDefault: "324", - IsNullable: false, - DataType: "", - CharacterMaximumLength: nil, - NumericPrecision: nil, - NumericScale: nil, - }, - }, - } - - groupedTransformers := map[string]map[string]*mgmtv1alpha1.JobMappingTransformer{ - "public.users": { - "id": &mgmtv1alpha1.JobMappingTransformer{}, - "name": &mgmtv1alpha1.JobMappingTransformer{}, - }, - } - - runconfigs := []*tabledependency.RunConfig{ - tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"id", "name"}, []string{"id", "name"}, []*tabledependency.DependsOn{}, false), - } - - queryMap := map[string]map[tabledependency.RunType]string{ - "public.users": {tabledependency.RunTypeInsert: ""}, - } - logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) - - res, err := buildBenthosSqlSourceConfigResponses( - logger, - context.Background(), - mockTransformerClient, - tableMappings, - runconfigs, - dsn, - driver, - queryMap, - groupedSchemas, - map[string][]*sqlmanager_shared.ForeignConstraint{}, - groupedTransformers, - mockJobId, - mockRunId, - nil, - nil, - "postgres", - ) - require.NoError(t, err) - require.Empty(t, res[0].Config.StreamConfig.Pipeline.Processors) -} - -func TestAreMappingsSubsetOfSchemas(t *testing.T) { - ok := areMappingsSubsetOfSchemas( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - "created_by": &sqlmanager_shared.ColumnInfo{}, - "updated_by": &sqlmanager_shared.ColumnInfo{}, - }, - "neosync_api.accounts": { - "id": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - {Schema: "public", Table: "users", Column: "created_by"}, - }, - ) - require.True(t, ok, "job mappings are a subset of the present database schemas") - - ok = areMappingsSubsetOfSchemas( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id2"}, - }, - ) - require.False(t, ok, "job mappings contain mapping that is not in the source schema") - - ok = areMappingsSubsetOfSchemas( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - {Schema: "public", Table: "users", Column: "created_by"}, - }, - ) - require.False(t, ok, "job mappings contain more mappings than are present in the source schema") -} - -func TestShouldHaltOnSchemaAddition(t *testing.T) { - ok := shouldHaltOnSchemaAddition( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - "created_by": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - {Schema: "public", Table: "users", Column: "created_by"}, - }, - ) - require.False(t, ok, "job mappings are valid set of database schemas") - - ok = shouldHaltOnSchemaAddition( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - "created_by": &sqlmanager_shared.ColumnInfo{}, - }, - "neosync_api.accounts": { - "id": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - {Schema: "public", Table: "users", Column: "created_by"}, - }, - ) - require.True(t, ok, "job mappings are missing database schema mappings") - - ok = shouldHaltOnSchemaAddition( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - "created_by": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - }, - ) - require.True(t, ok, "job mappings are missing table column") - - ok = shouldHaltOnSchemaAddition( - map[string]map[string]*sqlmanager_shared.ColumnInfo{ - "public.users": { - "id": &sqlmanager_shared.ColumnInfo{}, - "created_by": &sqlmanager_shared.ColumnInfo{}, - }, - }, - []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - {Schema: "public", Table: "users", Column: "updated_by"}, - }, - ) - require.True(t, ok, "job mappings have same column count, but missing specific column") -} - -func Test_buildProcessorConfigsMutation(t *testing.T) { - mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) - - ctx := context.Background() - - runconfig := tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{}, nil, []string{}, []string{}, []*tabledependency.DependsOn{}, false) - output, err := buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{}, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - require.Nil(t, err) - require.Empty(t, output) - - output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{}, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - require.Nil(t, err) - require.Empty(t, output) - - runconfig = tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{}, nil, []string{}, []string{"id"}, []*tabledependency.DependsOn{}, false) - output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id"}, - }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - require.Nil(t, err) - require.Empty(t, output) - - output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{}}, - }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - require.Nil(t, err) - require.Empty(t, output) - - output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, - }}}, - }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - require.Nil(t, err) - require.Empty(t, output) - - runconfig = tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{}, nil, []string{}, []string{"id", "name"}, []*tabledependency.DependsOn{}, false) - output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{ - Nullconfig: &mgmtv1alpha1.Null{}, - }, - }}}, - {Schema: "public", Table: "users", Column: "name", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{ - Nullconfig: &mgmtv1alpha1.Null{}, - }, - }}}, - }, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - - require.Nil(t, err) - - require.Equal(t, *output[0].Mutation, "root.\"id\" = null\nroot.\"name\" = null") - - jsT := mgmtv1alpha1.SystemTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ - TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ - PreserveDomain: gotypeutil.ToPtr(true), - PreserveLength: gotypeutil.ToPtr(false), - ExcludedDomains: []string{}, - }, - }, - }, - } - - emailLength := 40 - - groupedSchemas := map[string]*sqlmanager_shared.ColumnInfo{ - - "email": { - OrdinalPosition: 2, - ColumnDefault: "", - IsNullable: true, - DataType: "timestamptz", - CharacterMaximumLength: &emailLength, - NumericPrecision: nil, - NumericScale: nil, - }, - } - - runconfig = tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"email"}, []string{"email"}, []*tabledependency.DependsOn{}, false) - output, err = buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "email", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}}}, groupedSchemas, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, []string{}) - - require.Nil(t, err) - require.Equal(t, `root."email" = transform_email(value:this."email",preserve_length:false,preserve_domain:true,excluded_domains:[],max_length:40,email_type:"uuidv4",invalid_email_action:"reject")`, *output[0].Mutation) -} - -func Test_ShouldProcessColumnTrue(t *testing.T) { - val := &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateEmailConfig{ - GenerateEmailConfig: &mgmtv1alpha1.GenerateEmail{}, - }, - }, - } - - res := shouldProcessColumn(val) - require.Equal(t, true, res) -} - -func Test_ShouldProcessColumnFalse(t *testing.T) { - val := &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{ - PassthroughConfig: &mgmtv1alpha1.Passthrough{}, - }, - }, - } - - res := shouldProcessColumn(val) - require.Equal(t, false, res) -} - -func Test_buildProcessorConfigsJavascriptEmpty(t *testing.T) { - mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) - ctx := context.Background() - - jsT := mgmtv1alpha1.SystemTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformJavascriptConfig{ - TransformJavascriptConfig: &mgmtv1alpha1.TransformJavascript{ - Code: ``, - }, - }, - }, - } - - runconfig := tabledependency.NewRunConfig("public.users", tabledependency.RunTypeInsert, []string{"id"}, nil, []string{"id"}, []string{"id"}, []*tabledependency.DependsOn{}, false) - resp, err := buildProcessorConfigs(ctx, mockTransformerClient, []*mgmtv1alpha1.JobMapping{ - {Schema: "public", Table: "users", Column: "id", Transformer: &mgmtv1alpha1.JobMappingTransformer{Config: jsT.Config}}}, map[string]*sqlmanager_shared.ColumnInfo{}, map[string][]*referenceKey{}, []string{}, mockJobId, mockRunId, nil, runconfig, nil, - []string{}) - - require.NoError(t, err) - require.Empty(t, resp) -} - -func Test_convertUserDefinedFunctionConfig(t *testing.T) { - mockTransformerClient := mgmtv1alpha1connect.NewMockTransformersServiceClient(t) - - ctx := context.Background() - - mockTransformerClient.On( - "GetUserDefinedTransformerById", - mock.Anything, - connect.NewRequest(&mgmtv1alpha1.GetUserDefinedTransformerByIdRequest{ - TransformerId: "123", - }), - ).Return(connect.NewResponse(&mgmtv1alpha1.GetUserDefinedTransformerByIdResponse{ - Transformer: &mgmtv1alpha1.UserDefinedTransformer{ - Id: "123", - Name: "stage", - Description: "description", - DataType: mgmtv1alpha1.TransformerDataType_TRANSFORMER_DATA_TYPE_STRING, - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_EMAIL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ - TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ - PreserveDomain: gotypeutil.ToPtr(true), - PreserveLength: gotypeutil.ToPtr(false), - ExcludedDomains: []string{}, - }, - }, - }, - }, - }), nil) - - jmt := &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_UserDefinedTransformerConfig{ - UserDefinedTransformerConfig: &mgmtv1alpha1.UserDefinedTransformerConfig{ - Id: "123", - }, - }, - }, - } - - expected := &mgmtv1alpha1.JobMappingTransformer{ - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_EMAIL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ - TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ - PreserveDomain: gotypeutil.ToPtr(true), - PreserveLength: gotypeutil.ToPtr(false), - ExcludedDomains: []string{}, - }, - }, - }, - } - - resp, err := convertUserDefinedFunctionConfig(ctx, mockTransformerClient, jmt) - require.NoError(t, err) - require.Equal(t, resp, expected) -} - -func Test_buildPlainInsertArgs(t *testing.T) { - require.Empty(t, buildPlainInsertArgs(nil)) - require.Empty(t, buildPlainInsertArgs([]string{})) - require.Equal(t, buildPlainInsertArgs([]string{"foo", "bar", "baz"}), `root = [this."foo", this."bar", this."baz"]`) -} - -func Test_buildPlainColumns(t *testing.T) { - require.Empty(t, buildPlainColumns(nil)) - require.Empty(t, buildPlainColumns([]*mgmtv1alpha1.JobMapping{})) - require.Equal( - t, - buildPlainColumns([]*mgmtv1alpha1.JobMapping{ - {Column: "foo"}, - {Column: "bar"}, - {Column: "baz"}, - }), - []string{"foo", "bar", "baz"}, - ) -} - -func Test_buildBenthosS3Credentials(t *testing.T) { - require.Nil(t, buildBenthosS3Credentials(nil)) - - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{}), - &neosync_benthos.AwsCredentials{}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{Profile: shared.Ptr("foo")}), - &neosync_benthos.AwsCredentials{Profile: "foo"}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{AccessKeyId: shared.Ptr("foo")}), - &neosync_benthos.AwsCredentials{Id: "foo"}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{SecretAccessKey: shared.Ptr("foo")}), - &neosync_benthos.AwsCredentials{Secret: "foo"}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{SessionToken: shared.Ptr("foo")}), - &neosync_benthos.AwsCredentials{Token: "foo"}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{FromEc2Role: shared.Ptr(true)}), - &neosync_benthos.AwsCredentials{FromEc2Role: true}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{RoleArn: shared.Ptr("foo")}), - &neosync_benthos.AwsCredentials{Role: "foo"}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{RoleExternalId: shared.Ptr("foo")}), - &neosync_benthos.AwsCredentials{RoleExternalId: "foo"}, - ) - require.Equal( - t, - buildBenthosS3Credentials(&mgmtv1alpha1.AwsS3Credentials{ - Profile: shared.Ptr("profile"), - AccessKeyId: shared.Ptr("access-key"), - SecretAccessKey: shared.Ptr("secret"), - SessionToken: shared.Ptr("session"), - FromEc2Role: shared.Ptr(false), - RoleArn: shared.Ptr("role"), - RoleExternalId: shared.Ptr("foo"), - }), - &neosync_benthos.AwsCredentials{ - Profile: "profile", - Id: "access-key", - Secret: "secret", - Token: "session", - FromEc2Role: false, - Role: "role", - RoleExternalId: "foo", - }, - ) -} - -func Test_computeMutationFunction_null(t *testing.T) { - val, err := computeMutationFunction( - &mgmtv1alpha1.JobMapping{ - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{}}, - }, - }, &sqlmanager_shared.ColumnInfo{}, false) - require.NoError(t, err) - require.Equal(t, val, "null") -} - -func Test_computeMutationFunction_Validate_Bloblang_Output(t *testing.T) { - uuidEmailType := mgmtv1alpha1.GenerateEmailType_GENERATE_EMAIL_TYPE_UUID_V4 - transformers := []*mgmtv1alpha1.SystemTransformer{ - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_EMAIL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateEmailConfig{ - GenerateEmailConfig: &mgmtv1alpha1.GenerateEmail{ - EmailType: &uuidEmailType, - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_EMAIL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{ - TransformEmailConfig: &mgmtv1alpha1.TransformEmail{ - PreserveDomain: gotypeutil.ToPtr(false), - PreserveLength: gotypeutil.ToPtr(false), - ExcludedDomains: []string{"gmail", "yahoo"}, - EmailType: &uuidEmailType, - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_BOOL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{ - GenerateBoolConfig: &mgmtv1alpha1.GenerateBool{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_CARD_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateCardNumberConfig{ - GenerateCardNumberConfig: &mgmtv1alpha1.GenerateCardNumber{ - ValidLuhn: gotypeutil.ToPtr(true), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_CITY, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateCityConfig{ - GenerateCityConfig: &mgmtv1alpha1.GenerateCity{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_E164_PHONE_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateE164PhoneNumberConfig{ - GenerateE164PhoneNumberConfig: &mgmtv1alpha1.GenerateE164PhoneNumber{ - Min: gotypeutil.ToPtr(int64(9)), - Max: gotypeutil.ToPtr(int64(15)), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FIRST_NAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateFirstNameConfig{ - GenerateFirstNameConfig: &mgmtv1alpha1.GenerateFirstName{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FLOAT64, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateFloat64Config{ - GenerateFloat64Config: &mgmtv1alpha1.GenerateFloat64{ - RandomizeSign: gotypeutil.ToPtr(true), - Min: gotypeutil.ToPtr(1.00), - Max: gotypeutil.ToPtr(100.00), - Precision: gotypeutil.ToPtr(int64(6)), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FULL_ADDRESS, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateFullAddressConfig{ - GenerateFullAddressConfig: &mgmtv1alpha1.GenerateFullAddress{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_FULL_NAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateFullNameConfig{ - GenerateFullNameConfig: &mgmtv1alpha1.GenerateFullName{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_GENDER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateGenderConfig{ - GenerateGenderConfig: &mgmtv1alpha1.GenerateGender{ - Abbreviate: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_INT64_PHONE_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64PhoneNumberConfig{ - GenerateInt64PhoneNumberConfig: &mgmtv1alpha1.GenerateInt64PhoneNumber{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_INT64, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64Config{ - GenerateInt64Config: &mgmtv1alpha1.GenerateInt64{ - RandomizeSign: gotypeutil.ToPtr(true), - Min: gotypeutil.ToPtr(int64(1)), - Max: gotypeutil.ToPtr(int64(40)), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_LAST_NAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateLastNameConfig{ - GenerateLastNameConfig: &mgmtv1alpha1.GenerateLastName{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_SHA256HASH, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateSha256HashConfig{ - GenerateSha256HashConfig: &mgmtv1alpha1.GenerateSha256Hash{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_SSN, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateSsnConfig{ - GenerateSsnConfig: &mgmtv1alpha1.GenerateSSN{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_STATE, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateStateConfig{ - GenerateStateConfig: &mgmtv1alpha1.GenerateState{ - GenerateFullName: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_STREET_ADDRESS, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateStreetAddressConfig{ - GenerateStreetAddressConfig: &mgmtv1alpha1.GenerateStreetAddress{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_STRING_PHONE_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateStringPhoneNumberConfig{ - GenerateStringPhoneNumberConfig: &mgmtv1alpha1.GenerateStringPhoneNumber{ - Min: gotypeutil.ToPtr(int64(9)), - Max: gotypeutil.ToPtr(int64(14)), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_RANDOM_STRING, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateStringConfig{ - GenerateStringConfig: &mgmtv1alpha1.GenerateString{ - Min: gotypeutil.ToPtr(int64(2)), - Max: gotypeutil.ToPtr(int64(7)), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_UNIXTIMESTAMP, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateUnixtimestampConfig{ - GenerateUnixtimestampConfig: &mgmtv1alpha1.GenerateUnixTimestamp{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_USERNAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateUsernameConfig{ - GenerateUsernameConfig: &mgmtv1alpha1.GenerateUsername{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_UTCTIMESTAMP, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateUtctimestampConfig{ - GenerateUtctimestampConfig: &mgmtv1alpha1.GenerateUtcTimestamp{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_UUID, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateUuidConfig{ - GenerateUuidConfig: &mgmtv1alpha1.GenerateUuid{ - IncludeHyphens: gotypeutil.ToPtr(true), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_ZIPCODE, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateZipcodeConfig{ - GenerateZipcodeConfig: &mgmtv1alpha1.GenerateZipcode{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_E164_PHONE_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformE164PhoneNumberConfig{ - TransformE164PhoneNumberConfig: &mgmtv1alpha1.TransformE164PhoneNumber{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_FIRST_NAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformFirstNameConfig{ - TransformFirstNameConfig: &mgmtv1alpha1.TransformFirstName{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_FLOAT64, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformFloat64Config{ - TransformFloat64Config: &mgmtv1alpha1.TransformFloat64{ - RandomizationRangeMin: gotypeutil.ToPtr(20.00), - RandomizationRangeMax: gotypeutil.ToPtr(50.00), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_FULL_NAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformFullNameConfig{ - TransformFullNameConfig: &mgmtv1alpha1.TransformFullName{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_INT64_PHONE_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformInt64PhoneNumberConfig{ - TransformInt64PhoneNumberConfig: &mgmtv1alpha1.TransformInt64PhoneNumber{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_INT64, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformInt64Config{ - TransformInt64Config: &mgmtv1alpha1.TransformInt64{ - RandomizationRangeMin: gotypeutil.ToPtr(int64(20)), - RandomizationRangeMax: gotypeutil.ToPtr(int64(50)), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_LAST_NAME, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformLastNameConfig{ - TransformLastNameConfig: &mgmtv1alpha1.TransformLastName{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_PHONE_NUMBER, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformPhoneNumberConfig{ - TransformPhoneNumberConfig: &mgmtv1alpha1.TransformPhoneNumber{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_STRING, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformStringConfig{ - TransformStringConfig: &mgmtv1alpha1.TransformString{ - PreserveLength: gotypeutil.ToPtr(false), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_CATEGORICAL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateCategoricalConfig{ - GenerateCategoricalConfig: &mgmtv1alpha1.GenerateCategorical{ - Categories: gotypeutil.ToPtr("value1,value2"), - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_TRANSFORM_CHARACTER_SCRAMBLE, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_TransformCharacterScrambleConfig{ - TransformCharacterScrambleConfig: &mgmtv1alpha1.TransformCharacterScramble{ - UserProvidedRegex: nil, - }, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_DEFAULT, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{ - GenerateDefaultConfig: &mgmtv1alpha1.GenerateDefault{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_NULL, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{ - Nullconfig: &mgmtv1alpha1.Null{}, - }, - }, - }, - { - Source: mgmtv1alpha1.TransformerSource_TRANSFORMER_SOURCE_GENERATE_COUNTRY, - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateCountryConfig{ - GenerateCountryConfig: &mgmtv1alpha1.GenerateCountry{ - GenerateFullName: gotypeutil.ToPtr(false), - }, - }, - }, - }, - } - - emailColInfo := &sqlmanager_shared.ColumnInfo{ - OrdinalPosition: 2, - ColumnDefault: "", - IsNullable: true, - DataType: "timestamptz", - CharacterMaximumLength: shared.Ptr(40), - NumericPrecision: nil, - NumericScale: nil, - } - - for _, transformer := range transformers { - t.Run(fmt.Sprintf("%s_%s_lint", t.Name(), transformer.Source), func(t *testing.T) { - val, err := computeMutationFunction( - &mgmtv1alpha1.JobMapping{ - Column: "email", - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: transformer.Config, - }, - }, emailColInfo, false) - require.NoError(t, err) - ex, err := bloblang.Parse(val) - require.NoError(t, err, fmt.Sprintf("transformer lint failed, check that the transformer string is being constructed correctly. Failing source: %s", transformer.Source)) - _, err = ex.Query(nil) - require.NoError(t, err) - }) - } -} - -func Test_computeMutationFunction_Validate_Bloblang_Output_EmptyConfigs(t *testing.T) { - transformers := []*mgmtv1alpha1.SystemTransformer{ - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateEmailConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformEmailConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCardNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCityConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateE164PhoneNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFirstNameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFloat64Config{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFullAddressConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFullNameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateGenderConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64PhoneNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateInt64Config{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateLastNameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateSha256HashConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateSsnConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStateConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStreetAddressConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStringPhoneNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateStringConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUnixtimestampConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUsernameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUtctimestampConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateUuidConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateZipcodeConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformE164PhoneNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFirstNameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateFloat64Config{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformFullNameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformInt64PhoneNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformInt64Config{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformLastNameConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformPhoneNumberConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformStringConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCategoricalConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_TransformCharacterScrambleConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{}}, - }, - { - Config: &mgmtv1alpha1.TransformerConfig{Config: &mgmtv1alpha1.TransformerConfig_GenerateCountryConfig{}}, - }, - } - - emailColInfo := &sqlmanager_shared.ColumnInfo{ - OrdinalPosition: 2, - ColumnDefault: "", - IsNullable: true, - DataType: "timestamptz", - CharacterMaximumLength: shared.Ptr(40), - NumericPrecision: nil, - NumericScale: nil, - } - - for _, transformer := range transformers { - t.Run(fmt.Sprintf("%s_%s_lint", t.Name(), transformer.Source), func(t *testing.T) { - val, err := computeMutationFunction( - &mgmtv1alpha1.JobMapping{ - Column: "email", - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: transformer.Config, - }, - }, emailColInfo, false) - require.NoError(t, err) - ex, err := bloblang.Parse(val) - require.NoError(t, err, fmt.Sprintf("transformer lint failed, check that the transformer string is being constructed correctly. Failing source: %s", transformer.Source)) - _, err = ex.Query(nil) - require.NoError(t, err) - }) - } -} - -func Test_computeMutationFunction_handles_Db_Maxlen(t *testing.T) { - type testcase struct { - jm *mgmtv1alpha1.JobMapping - ci *sqlmanager_shared.ColumnInfo - expected string - } - jm := &mgmtv1alpha1.JobMapping{ - Transformer: &mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateStringConfig{ - GenerateStringConfig: &mgmtv1alpha1.GenerateString{ - Min: gotypeutil.ToPtr(int64(2)), - Max: gotypeutil.ToPtr(int64(7)), - }, - }, - }, - }, - } - testcases := []testcase{ - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{}, - expected: "generate_string(min:2,max:7)", - }, - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{ - CharacterMaximumLength: nil, - }, - expected: "generate_string(min:2,max:7)", - }, - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{ - CharacterMaximumLength: shared.Ptr(-1), - }, - expected: "generate_string(min:2,max:7)", - }, - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{ - CharacterMaximumLength: shared.Ptr(0), - }, - expected: "generate_string(min:2,max:7)", - }, - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{ - CharacterMaximumLength: shared.Ptr(10), - }, - expected: "generate_string(min:2,max:7)", - }, - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{ - CharacterMaximumLength: shared.Ptr(3), - }, - expected: "generate_string(min:2,max:3)", - }, - { - jm: jm, - ci: &sqlmanager_shared.ColumnInfo{ - CharacterMaximumLength: shared.Ptr(1), - }, - expected: "generate_string(min:1,max:1)", - }, - } - - for _, tc := range testcases { - t.Run(t.Name(), func(t *testing.T) { - out, err := computeMutationFunction(tc.jm, tc.ci, false) - require.NoError(t, err) - require.NotNil(t, out) - require.Equal(t, tc.expected, out, "computed bloblang string was not expected") - ex, err := bloblang.Parse(out) - require.NoError(t, err) - _, err = ex.Query(nil) - require.NoError(t, err) - }) - } -} - -func Test_buildBranchCacheConfigs_null(t *testing.T) { - cols := []*mgmtv1alpha1.JobMapping{ - { - Schema: "public", - Table: "users", - Column: "user_id", - }, - } - - constraints := map[string][]*referenceKey{ - "name": { - { - Table: "public.orders", - Column: "buyer_id", - }, - }, - } - - resp, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, nil) - require.NoError(t, err) - require.Len(t, resp, 0) -} - -func Test_buildBranchCacheConfigs_missing_redis(t *testing.T) { - cols := []*mgmtv1alpha1.JobMapping{ - { - Schema: "public", - Table: "users", - Column: "user_id", - }, - } - - constraints := map[string][]*referenceKey{ - "user_id": { - { - Table: "public.orders", - Column: "buyer_id", - }, - }, - } - - _, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, nil) - require.Error(t, err) -} - -func Test_buildBranchCacheConfigs_success(t *testing.T) { - cols := []*mgmtv1alpha1.JobMapping{ - { - Schema: "public", - Table: "users", - Column: "user_id", - }, - { - Schema: "public", - Table: "users", - Column: "name", - }, - } - - constraints := map[string][]*referenceKey{ - "user_id": { - { - Table: "public.orders", - Column: "buyer_id", - }, - }, - } - redisConfig := &shared.RedisConfig{ - Url: "redis://localhost:6379", - Kind: "simple", - } - - resp, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, redisConfig) - - require.NoError(t, err) - require.Len(t, resp, 1) - require.Equal(t, *resp[0].RequestMap, `root = if this."user_id" == null { deleted() } else { this }`) - require.Equal(t, *resp[0].ResultMap, `root."user_id" = this`) -} - -func Test_buildBranchCacheConfigs_self_referencing(t *testing.T) { - cols := []*mgmtv1alpha1.JobMapping{ - { - Schema: "public", - Table: "users", - Column: "user_id", - }, - } - - constraints := map[string][]*referenceKey{ - "user_id": { - { - Table: "public.users", - Column: "other_id", - }, - }, - } - redisConfig := &shared.RedisConfig{ - Url: "redis://localhost:6379", - Kind: "simple", - } - - resp, err := buildBranchCacheConfigs(cols, constraints, mockJobId, mockRunId, redisConfig) - require.NoError(t, err) - require.Len(t, resp, 0) -} - -func Test_getPrimaryKeyDependencyMap(t *testing.T) { - tableDependencies := map[string][]*sqlmanager_shared.ForeignConstraint{ - "hr.countries": { - { - Columns: []string{"region_id"}, - NotNullable: []bool{true}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.regions", - Columns: []string{"region_id"}, - }, - }, - }, - "hr.departments": { - { - Columns: []string{"location_id"}, - NotNullable: []bool{false}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.locations", - Columns: []string{"location_id"}, - }, - }, - }, - "hr.dependents": { - { - Columns: []string{"employee_id"}, - NotNullable: []bool{true}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.employees", - Columns: []string{"employee_id"}, - }, - }, - }, - "hr.employees": { - { - Columns: []string{"job_id"}, - NotNullable: []bool{true}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.jobs", - Columns: []string{"job_id"}, - }, - }, - { - Columns: []string{"department_id"}, - NotNullable: []bool{false}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.departments", - Columns: []string{"department_id"}, - }, - }, - { - Columns: []string{"manager_id"}, - NotNullable: []bool{false}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.employees", - Columns: []string{"employee_id"}, - }, - }, - }, - "hr.locations": { - { - Columns: []string{"country_id"}, - NotNullable: []bool{true}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "hr.countries", - Columns: []string{"country_id"}, - }, - }, - }, - } - - expected := map[string]map[string][]*referenceKey{ - "hr.regions": { - "region_id": { - { - Table: "hr.countries", - Column: "region_id", - }, - }, - }, - "hr.locations": { - "location_id": { - { - Table: "hr.departments", - Column: "location_id", - }, - }, - }, - "hr.employees": { - "employee_id": { - { - Table: "hr.dependents", - Column: "employee_id", - }, - { - Table: "hr.employees", - Column: "manager_id", - }, - }, - }, - "hr.jobs": { - "job_id": { - { - Table: "hr.employees", - Column: "job_id", - }, - }, - }, - "hr.departments": { - "department_id": { - { - Table: "hr.employees", - Column: "department_id", - }, - }, - }, - "hr.countries": { - "country_id": { - { - Table: "hr.locations", - Column: "country_id", - }, - }, - }, - } - - actual := getPrimaryKeyDependencyMap(tableDependencies) - for table, depsMap := range expected { - actualDepsMap := actual[table] - require.NotNil(t, actualDepsMap) - for col, deps := range depsMap { - actualDeps := actualDepsMap[col] - require.ElementsMatch(t, deps, actualDeps) - } - } -} - -func Test_getPrimaryKeyDependencyMap_compositekeys(t *testing.T) { - tableDependencies := map[string][]*sqlmanager_shared.ForeignConstraint{ - "employees": { - { - Columns: []string{"department_id"}, - NotNullable: []bool{false}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "department", - Columns: []string{"department_id"}, - }, - }, - }, - "projects": { - { - Columns: []string{"responsible_employee_id", "responsible_department_id"}, - NotNullable: []bool{true}, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: "employees", - Columns: []string{"employee_id", "department_id"}, - }, - }, - }, - } - - expected := map[string]map[string][]*referenceKey{ - "department": { - "department_id": { - { - Table: "employees", - Column: "department_id", - }, - }, - }, - "employees": { - "employee_id": {{ - Table: "projects", - Column: "responsible_employee_id", - }}, - "department_id": {{ - Table: "projects", - Column: "responsible_department_id", - }}, - }, - } - - actual := getPrimaryKeyDependencyMap(tableDependencies) - require.Equal(t, expected, actual) -} - func Test_buildPostTableSyncRunCtx(t *testing.T) { t.Run("Empty input", func(t *testing.T) { result := buildPostTableSyncRunCtx(nil, nil) @@ -1546,7 +21,7 @@ func Test_buildPostTableSyncRunCtx(t *testing.T) { }) t.Run("No statements generated", func(t *testing.T) { - benthosConfigs := []*BenthosConfigResponse{ + benthosConfigs := []*benthosbuilder.BenthosConfigResponse{ { Name: "config1", RunType: tabledependency.RunTypeUpdate, @@ -1565,7 +40,7 @@ func Test_buildPostTableSyncRunCtx(t *testing.T) { }) t.Run("Statements generated for Postgres and MSSQL", func(t *testing.T) { - benthosConfigs := []*BenthosConfigResponse{ + benthosConfigs := []*benthosbuilder.BenthosConfigResponse{ { Name: "config1", RunType: tabledependency.RunTypeInsert, @@ -1626,3 +101,87 @@ func Test_buildPostTableSyncRunCtx(t *testing.T) { require.Equal(t, expected, result, "Unexpected result when statements are generated") }) } + +func Test_BuildPgPostTableSyncStatement(t *testing.T) { + t.Run("Update run type", func(t *testing.T) { + bcUpdate := &benthosbuilder.BenthosConfigResponse{ + RunType: tabledependency.RunTypeUpdate, + } + resultUpdate := buildPgPostTableSyncStatement(bcUpdate) + require.Empty(t, resultUpdate, "Expected empty slice for Update run type") + }) + + t.Run("No columns need reset", func(t *testing.T) { + bcNoReset := &benthosbuilder.BenthosConfigResponse{ + RunType: tabledependency.RunTypeInsert, + ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ + "col1": {NeedsReset: false, HasDefaultTransformer: false}, + "col2": {NeedsReset: false, HasDefaultTransformer: true}, + }, + TableSchema: "public", + TableName: "test_table", + } + resultNoReset := buildPgPostTableSyncStatement(bcNoReset) + require.Empty(t, resultNoReset, "Expected empty slice when no columns need reset") + }) + + t.Run("Some columns need reset", func(t *testing.T) { + bcSomeReset := &benthosbuilder.BenthosConfigResponse{ + RunType: tabledependency.RunTypeInsert, + ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ + "col1": {NeedsReset: true, HasDefaultTransformer: false}, + "col2": {NeedsReset: false, HasDefaultTransformer: true}, + "col3": {NeedsReset: true, HasDefaultTransformer: false}, + }, + TableSchema: "public", + TableName: "test_table", + } + resultSomeReset := buildPgPostTableSyncStatement(bcSomeReset) + expectedSomeReset := []string{ + sqlmanager_postgres.BuildPgIdentityColumnResetCurrentSql("public", "test_table", "col1"), + sqlmanager_postgres.BuildPgIdentityColumnResetCurrentSql("public", "test_table", "col3"), + } + require.ElementsMatch(t, expectedSomeReset, resultSomeReset, "Unexpected result when some columns need reset") + }) +} + +func Test_BuildMssqlPostTableSyncStatement(t *testing.T) { + t.Run("Update run type", func(t *testing.T) { + bcUpdate := &benthosbuilder.BenthosConfigResponse{ + RunType: tabledependency.RunTypeUpdate, + } + resultUpdate := buildMssqlPostTableSyncStatement(bcUpdate) + require.Empty(t, resultUpdate, "Expected empty slice for Update run type") + }) + + t.Run("No columns need override", func(t *testing.T) { + bcNoOverride := &benthosbuilder.BenthosConfigResponse{ + RunType: tabledependency.RunTypeInsert, + ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ + "col1": {NeedsOverride: false}, + "col2": {NeedsOverride: false}, + }, + TableSchema: "dbo", + TableName: "test_table", + } + resultNoOverride := buildMssqlPostTableSyncStatement(bcNoOverride) + require.Empty(t, resultNoOverride, "Expected empty slice when no columns need override") + }) + + t.Run("Some columns need override", func(t *testing.T) { + bcSomeOverride := &benthosbuilder.BenthosConfigResponse{ + RunType: tabledependency.RunTypeInsert, + ColumnDefaultProperties: map[string]*neosync_benthos.ColumnDefaultProperties{ + "col1": {NeedsOverride: true}, + "col2": {NeedsOverride: false}, + }, + TableSchema: "dbo", + TableName: "test_table", + } + resultSomeOverride := buildMssqlPostTableSyncStatement(bcSomeOverride) + expectedSomeOverride := []string{ + sqlmanager_mssql.BuildMssqlIdentityColumnResetCurrent("dbo", "test_table"), + } + require.Equal(t, expectedSomeOverride, resultSomeOverride, "Unexpected result when some columns need override") + }) +} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync.go b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync.go deleted file mode 100644 index d98f19c44e..0000000000 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/sync.go +++ /dev/null @@ -1,1168 +0,0 @@ -package genbenthosconfigs_activity - -import ( - "context" - "errors" - "fmt" - "log/slog" - "slices" - "strings" - "time" - - mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" - "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect" - "github.com/nucleuscloud/neosync/backend/pkg/metrics" - "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager" - sqlmanager_mssql "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/mssql" - sqlmanager_postgres "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/postgres" - sqlmanager_shared "github.com/nucleuscloud/neosync/backend/pkg/sqlmanager/shared" - tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" - querybuilder "github.com/nucleuscloud/neosync/worker/pkg/query-builder2" - "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" -) - -type sqlSyncResp struct { - BenthosConfigs []*BenthosConfigResponse - primaryKeyToForeignKeysMap map[string]map[string][]*referenceKey - ColumnTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer - SchemaColumnInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo -} - -func (b *benthosBuilder) getSqlSyncBenthosConfigResponses( - ctx context.Context, - job *mgmtv1alpha1.Job, - slogger *slog.Logger, -) (*sqlSyncResp, error) { - sourceConnection, err := shared.GetJobSourceConnection(ctx, job.GetSource(), b.connclient) - if err != nil { - return nil, fmt.Errorf("unable to get connection by id: %w", err) - } - sourceConnectionType := shared.GetConnectionType(sourceConnection) - slogger = slogger.With( - "sourceConnectionType", sourceConnectionType, - ) - - sqlSourceOpts, err := getSqlJobSourceOpts(job.Source) - if err != nil { - return nil, err - } - var sourceTableOpts map[string]*sqlSourceTableOptions - if sqlSourceOpts != nil { - sourceTableOpts = groupSqlJobSourceOptionsByTable(sqlSourceOpts) - } - - db, err := b.sqlmanagerclient.NewPooledSqlDb(ctx, slogger, sourceConnection) - if err != nil { - return nil, fmt.Errorf("unable to create new sql db: %w", err) - } - defer db.Db.Close() - - groupedSchemas, err := db.Db.GetSchemaColumnMap(ctx) - if err != nil { - return nil, fmt.Errorf("unable to get database schema for connection: %w", err) - } - if !areMappingsSubsetOfSchemas(groupedSchemas, job.Mappings) { - return nil, errors.New(jobmappingSubsetErrMsg) - } - if sqlSourceOpts != nil && sqlSourceOpts.HaltOnNewColumnAddition && - shouldHaltOnSchemaAddition(groupedSchemas, job.Mappings) { - return nil, errors.New(haltOnSchemaAdditionErrMsg) - } - uniqueSchemas := shared.GetUniqueSchemasFromMappings(job.Mappings) - - tableConstraints, err := db.Db.GetTableConstraintsBySchema(ctx, uniqueSchemas) - if err != nil { - return nil, fmt.Errorf("unable to retrieve database table constraints: %w", err) - } - - foreignKeysMap, err := mergeVirtualForeignKeys(tableConstraints.ForeignKeyConstraints, job.GetVirtualForeignKeys(), groupedSchemas) - if err != nil { - return nil, err - } - - slogger.Info(fmt.Sprintf("found %d foreign key constraints for database", getMapValuesCount(tableConstraints.ForeignKeyConstraints))) - slogger.Info(fmt.Sprintf("found %d primary key constraints for database", getMapValuesCount(tableConstraints.PrimaryKeyConstraints))) - - groupedMappings := groupMappingsByTable(job.Mappings) - groupedTableMapping := getTableMappingsMap(groupedMappings) - colTransformerMap := getColumnTransformerMap(groupedTableMapping) // schema.table -> column -> transformer - filteredForeignKeysMap := filterForeignKeysMap(colTransformerMap, foreignKeysMap) - - tableSubsetMap := buildTableSubsetMap(sourceTableOpts, groupedTableMapping) - tableColMap := getTableColMapFromMappings(groupedMappings) - runConfigs, err := tabledependency.GetRunConfigs(filteredForeignKeysMap, tableSubsetMap, tableConstraints.PrimaryKeyConstraints, tableColMap) - if err != nil { - return nil, err - } - primaryKeyToForeignKeysMap := getPrimaryKeyDependencyMap(filteredForeignKeysMap) - - tableRunTypeQueryMap, err := querybuilder.BuildSelectQueryMap(db.Driver, filteredForeignKeysMap, runConfigs, sqlSourceOpts.SubsetByForeignKeyConstraints, groupedSchemas) - if err != nil { - return nil, fmt.Errorf("unable to build select queries: %w", err) - } - - sourceResponses, err := buildBenthosSqlSourceConfigResponses(slogger, ctx, b.transformerclient, groupedTableMapping, runConfigs, sourceConnection.Id, db.Driver, tableRunTypeQueryMap, groupedSchemas, filteredForeignKeysMap, colTransformerMap, b.jobId, b.runId, b.redisConfig, primaryKeyToForeignKeysMap, sourceConnectionType) - if err != nil { - return nil, fmt.Errorf("unable to build benthos sql source config responses: %w", err) - } - - return &sqlSyncResp{ - BenthosConfigs: sourceResponses, - primaryKeyToForeignKeysMap: primaryKeyToForeignKeysMap, - ColumnTransformerMap: colTransformerMap, - SchemaColumnInfoMap: groupedSchemas, - }, nil -} - -func filterForeignKeysMap( - colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, - foreignKeysMap map[string][]*sqlmanager_shared.ForeignConstraint, -) map[string][]*sqlmanager_shared.ForeignConstraint { - newFkMap := make(map[string][]*sqlmanager_shared.ForeignConstraint) - - for table, fks := range foreignKeysMap { - cols, ok := colTransformerMap[table] - if !ok { - continue - } - for _, fk := range fks { - newFk := &sqlmanager_shared.ForeignConstraint{ - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: fk.ForeignKey.Table, - }, - } - for i, c := range fk.Columns { - t, ok := cols[c] - if !fk.NotNullable[i] && (!ok || isNullJobMappingTransformer(t)) { - continue - } - - newFk.Columns = append(newFk.Columns, c) - newFk.NotNullable = append(newFk.NotNullable, fk.NotNullable[i]) - newFk.ForeignKey.Columns = append(newFk.ForeignKey.Columns, fk.ForeignKey.Columns[i]) - } - - if len(newFk.Columns) > 0 { - newFkMap[table] = append(newFkMap[table], newFk) - } - } - } - return newFkMap -} - -func isNullJobMappingTransformer(t *mgmtv1alpha1.JobMappingTransformer) bool { - switch t.GetConfig().GetConfig().(type) { - case *mgmtv1alpha1.TransformerConfig_Nullconfig: - return true - default: - return false - } -} - -func isDefaultJobMappingTransformer(t *mgmtv1alpha1.JobMappingTransformer) bool { - switch t.GetConfig().GetConfig().(type) { - case *mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig: - return true - default: - return false - } -} - -func mergeVirtualForeignKeys( - dbForeignKeys map[string][]*sqlmanager_shared.ForeignConstraint, - virtualForeignKeys []*mgmtv1alpha1.VirtualForeignConstraint, - colInfoMap map[string]map[string]*sqlmanager_shared.ColumnInfo, -) (map[string][]*sqlmanager_shared.ForeignConstraint, error) { - fks := map[string][]*sqlmanager_shared.ForeignConstraint{} - - for table, fk := range dbForeignKeys { - fks[table] = fk - } - - for _, fk := range virtualForeignKeys { - tn := sqlmanager_shared.BuildTable(fk.Schema, fk.Table) - fkTable := sqlmanager_shared.BuildTable(fk.GetForeignKey().Schema, fk.GetForeignKey().Table) - notNullable := []bool{} - for _, c := range fk.GetColumns() { - colMap, ok := colInfoMap[tn] - if !ok { - return nil, fmt.Errorf("virtual foreign key source table not found: %s", tn) - } - colInfo, ok := colMap[c] - if !ok { - return nil, fmt.Errorf("virtual foreign key source column not found: %s.%s", tn, c) - } - notNullable = append(notNullable, !colInfo.IsNullable) - } - fks[tn] = append(fks[tn], &sqlmanager_shared.ForeignConstraint{ - Columns: fk.GetColumns(), - NotNullable: notNullable, - ForeignKey: &sqlmanager_shared.ForeignKey{ - Table: fkTable, - Columns: fk.GetForeignKey().GetColumns(), - }, - }) - } - - return fks, nil -} - -func buildPgPostTableSyncStatement(bc *BenthosConfigResponse) []string { - statements := []string{} - if bc.RunType == tabledependency.RunTypeUpdate { - return statements - } - colDefaultProps := bc.ColumnDefaultProperties - for colName, p := range colDefaultProps { - if p.NeedsReset && !p.HasDefaultTransformer { - // resets sequences and identities - resetSql := sqlmanager_postgres.BuildPgIdentityColumnResetCurrentSql(bc.TableSchema, bc.TableName, colName) - statements = append(statements, resetSql) - } - } - return statements -} - -func buildMssqlPostTableSyncStatement(bc *BenthosConfigResponse) []string { - statements := []string{} - if bc.RunType == tabledependency.RunTypeUpdate { - return statements - } - colDefaultProps := bc.ColumnDefaultProperties - for _, p := range colDefaultProps { - if p.NeedsOverride { - // reset identity - resetSql := sqlmanager_mssql.BuildMssqlIdentityColumnResetCurrent(bc.TableSchema, bc.TableName) - statements = append(statements, resetSql) - } - } - return statements -} - -func buildBenthosSqlSourceConfigResponses( - slogger *slog.Logger, - ctx context.Context, - transformerclient mgmtv1alpha1connect.TransformersServiceClient, - groupedTableMapping map[string]*tableMapping, - runconfigs []*tabledependency.RunConfig, - dsnConnectionId string, - driver string, - tableRunTypeQueryMap map[string]map[tabledependency.RunType]string, - groupedColumnInfo map[string]map[string]*sqlmanager_shared.ColumnInfo, - tableDependencies map[string][]*sqlmanager_shared.ForeignConstraint, - colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, - jobId, runId string, - redisConfig *shared.RedisConfig, - primaryKeyToForeignKeysMap map[string]map[string][]*referenceKey, - sourceConnectionType string, -) ([]*BenthosConfigResponse, error) { - responses := []*BenthosConfigResponse{} - - // map of table constraints that have transformers - transformedForeignKeyToSourceMap := getTransformedFksMap(tableDependencies, colTransformerMap) - - for _, config := range runconfigs { - mappings, ok := groupedTableMapping[config.Table()] - if !ok { - return nil, fmt.Errorf("missing column mappings for table: %s", config.Table()) - } - query, ok := tableRunTypeQueryMap[config.Table()][config.RunType()] - if !ok { - return nil, fmt.Errorf("select query not found for table: %s runType: %s", config.Table(), config.RunType()) - } - bc := &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - PooledSqlRaw: &neosync_benthos.InputPooledSqlRaw{ - Driver: driver, - Dsn: "${SOURCE_CONNECTION_DSN}", - - Query: query, - }, - }, - }, - Pipeline: &neosync_benthos.PipelineConfig{ - Threads: -1, - Processors: []neosync_benthos.ProcessorConfig{}, - }, - Output: &neosync_benthos.OutputConfig{ - Outputs: neosync_benthos.Outputs{ - Broker: &neosync_benthos.OutputBrokerConfig{ - Pattern: "fan_out", - Outputs: []neosync_benthos.Outputs{}, - }, - }, - }, - }, - } - - columnForeignKeysMap := primaryKeyToForeignKeysMap[config.Table()] - transformedFktoPkMap := transformedForeignKeyToSourceMap[config.Table()] - colInfoMap := groupedColumnInfo[config.Table()] - tableColTransformers := colTransformerMap[config.Table()] - - processorConfigs, err := buildProcessorConfigsByRunType( - ctx, - transformerclient, - config, - columnForeignKeysMap, - transformedFktoPkMap, - jobId, - runId, - redisConfig, - mappings.Mappings, - colInfoMap, - nil, - []string{}, - ) - if err != nil { - return nil, err - } - for _, pc := range processorConfigs { - bc.StreamConfig.Pipeline.Processors = append(bc.StreamConfig.Pipeline.Processors, *pc) - } - - columnDefaultProperties, err := getColumnDefaultProperties(slogger, driver, config.InsertColumns(), colInfoMap, tableColTransformers) - if err != nil { - return nil, err - } - - responses = append(responses, &BenthosConfigResponse{ - Name: fmt.Sprintf("%s.%s", config.Table(), config.RunType()), - Config: bc, - DependsOn: config.DependsOn(), - RedisDependsOn: buildRedisDependsOnMap(transformedFktoPkMap, config), - RunType: config.RunType(), - - BenthosDsns: []*shared.BenthosDsn{{ConnectionId: dsnConnectionId, EnvVarKey: "SOURCE_CONNECTION_DSN"}}, - - TableSchema: mappings.Schema, - TableName: mappings.Table, - Columns: config.InsertColumns(), - ColumnDefaultProperties: columnDefaultProperties, - primaryKeys: config.PrimaryKeys(), - - SourceConnectionType: sourceConnectionType, - metriclabels: metrics.MetricLabels{ - metrics.NewEqLabel(metrics.TableSchemaLabel, mappings.Schema), - metrics.NewEqLabel(metrics.TableNameLabel, mappings.Table), - metrics.NewEqLabel(metrics.JobTypeLabel, "sync"), - }, - }) - } - - return responses, nil -} - -func getColumnDefaultProperties( - slogger *slog.Logger, - driver string, - cols []string, - colInfo map[string]*sqlmanager_shared.ColumnInfo, - colTransformers map[string]*mgmtv1alpha1.JobMappingTransformer, -) (map[string]*neosync_benthos.ColumnDefaultProperties, error) { - colDefaults := map[string]*neosync_benthos.ColumnDefaultProperties{} - for _, cName := range cols { - info, ok := colInfo[cName] - if !ok { - return nil, fmt.Errorf("column default type missing. column: %s", cName) - } - needsOverride, needsReset, err := sqlmanager.GetColumnOverrideAndResetProperties(driver, info) - if err != nil { - slogger.Error("unable to determine SQL column default flags", "error", err, "column", cName) - return nil, err - } - - jmTransformer, ok := colTransformers[cName] - if !ok { - return nil, fmt.Errorf("transformer missing for column: %s", cName) - } - var hasDefaultTransformer bool - if jmTransformer != nil && isDefaultJobMappingTransformer(jmTransformer) { - hasDefaultTransformer = true - } - if !needsReset && !needsOverride && !hasDefaultTransformer { - continue - } - colDefaults[cName] = &neosync_benthos.ColumnDefaultProperties{ - NeedsReset: needsReset, - NeedsOverride: needsOverride, - HasDefaultTransformer: hasDefaultTransformer, - } - } - return colDefaults, nil -} - -func buildRedisDependsOnMap(transformedForeignKeyToSourceMap map[string][]*referenceKey, runconfig *tabledependency.RunConfig) map[string][]string { - redisDependsOnMap := map[string][]string{} - for col, fks := range transformedForeignKeyToSourceMap { - if !slices.Contains(runconfig.InsertColumns(), col) { - continue - } - for _, fk := range fks { - if _, exists := redisDependsOnMap[fk.Table]; !exists { - redisDependsOnMap[fk.Table] = []string{} - } - redisDependsOnMap[fk.Table] = append(redisDependsOnMap[fk.Table], fk.Column) - } - } - if runconfig.RunType() == tabledependency.RunTypeUpdate && len(redisDependsOnMap) != 0 { - redisDependsOnMap[runconfig.Table()] = runconfig.PrimaryKeys() - } - return redisDependsOnMap -} - -func getTransformedFksMap( - tabledependencies map[string][]*sqlmanager_shared.ForeignConstraint, - colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, -) map[string]map[string][]*referenceKey { - foreignKeyToSourceMap := buildForeignKeySourceMap(tabledependencies) - // filter this list by table constraints that has transformer - transformedForeignKeyToSourceMap := map[string]map[string][]*referenceKey{} // schema.table -> column -> foreignKey - for table, constraints := range foreignKeyToSourceMap { - _, ok := transformedForeignKeyToSourceMap[table] - if !ok { - transformedForeignKeyToSourceMap[table] = map[string][]*referenceKey{} - } - for col, tc := range constraints { - // only add constraint if foreign key has transformer - transformer, transformerOk := colTransformerMap[tc.Table][tc.Column] - if transformerOk && shouldProcessStrict(transformer) { - transformedForeignKeyToSourceMap[table][col] = append(transformedForeignKeyToSourceMap[table][col], tc) - } - } - } - return transformedForeignKeyToSourceMap -} - -func buildProcessorConfigsByRunType( - ctx context.Context, - transformerclient mgmtv1alpha1connect.TransformersServiceClient, - config *tabledependency.RunConfig, - columnForeignKeysMap map[string][]*referenceKey, - transformedFktoPkMap map[string][]*referenceKey, - jobId, runId string, - redisConfig *shared.RedisConfig, - mappings []*mgmtv1alpha1.JobMapping, - columnInfoMap map[string]*sqlmanager_shared.ColumnInfo, - jobSourceOptions *mgmtv1alpha1.JobSourceOptions, - mappedKeys []string, -) ([]*neosync_benthos.ProcessorConfig, error) { - if config.RunType() == tabledependency.RunTypeUpdate { - // sql update processor configs - processorConfigs, err := buildSqlUpdateProcessorConfigs(config, redisConfig, jobId, runId, transformedFktoPkMap) - if err != nil { - return nil, err - } - return processorConfigs, nil - } else { - // sql insert processor configs - fkSourceCols := []string{} - for col := range columnForeignKeysMap { - fkSourceCols = append(fkSourceCols, col) - } - processorConfigs, err := buildProcessorConfigs( - ctx, - transformerclient, - mappings, - columnInfoMap, - transformedFktoPkMap, - fkSourceCols, - jobId, - runId, - redisConfig, - config, - jobSourceOptions, - mappedKeys, - ) - if err != nil { - return nil, err - } - return processorConfigs, nil - } -} - -func (b *benthosBuilder) getSqlSyncBenthosOutput( - driver string, - destination *mgmtv1alpha1.JobDestination, - benthosConfig *BenthosConfigResponse, - dsn string, - primaryKeyToForeignKeysMap map[string]map[string][]*referenceKey, - colTransformerMap map[string]map[string]*mgmtv1alpha1.JobMappingTransformer, - colInfoMap map[string]*sqlmanager_shared.ColumnInfo, -) ([]neosync_benthos.Outputs, error) { - outputs := []neosync_benthos.Outputs{} - tableKey := neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName) - destOpts := getDestinationOptions(destination) - if benthosConfig.RunType == tabledependency.RunTypeUpdate { - args := benthosConfig.Columns - args = append(args, benthosConfig.primaryKeys...) - outputs = append(outputs, neosync_benthos.Outputs{ - Fallback: []neosync_benthos.Outputs{ - { - PooledSqlUpdate: &neosync_benthos.PooledSqlUpdate{ - Driver: driver, - Dsn: dsn, - - Schema: benthosConfig.TableSchema, - Table: benthosConfig.TableName, - Columns: benthosConfig.Columns, - SkipForeignKeyViolations: destOpts.SkipForeignKeyViolations, - WhereColumns: benthosConfig.primaryKeys, - ArgsMapping: buildPlainInsertArgs(args), - - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }, - }, - // kills activity depending on error - {Error: &neosync_benthos.ErrorOutputConfig{ - ErrorMsg: `${! meta("fallback_error")}`, - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }}, - }, - }) - } else { - // adds redis hash output for transformed primary keys - constraints := primaryKeyToForeignKeysMap[tableKey] - for col := range constraints { - transformer := colTransformerMap[tableKey][col] - if shouldProcessStrict(transformer) { - if b.redisConfig == nil { - return nil, fmt.Errorf("missing redis config. this operation requires redis") - } - hashedKey := neosync_benthos.HashBenthosCacheKey(b.jobId, b.runId, tableKey, col) - outputs = append(outputs, neosync_benthos.Outputs{ - RedisHashOutput: &neosync_benthos.RedisHashOutputConfig{ - Url: b.redisConfig.Url, - Key: hashedKey, - FieldsMapping: fmt.Sprintf(`root = {meta(%q): json(%q)}`, hashPrimaryKeyMetaKey(benthosConfig.TableSchema, benthosConfig.TableName, col), col), // map of original value to transformed value - WalkMetadata: false, - WalkJsonObject: false, - Kind: &b.redisConfig.Kind, - Master: b.redisConfig.Master, - Tls: shared.BuildBenthosRedisTlsConfig(b.redisConfig), - }, - }) - benthosConfig.RedisConfig = append(benthosConfig.RedisConfig, &BenthosRedisConfig{ - Key: hashedKey, - Table: tableKey, - Column: col, - }) - } - } - - columnTypes := []string{} - for _, c := range benthosConfig.Columns { - colType, ok := colInfoMap[c] - if ok { - columnTypes = append(columnTypes, colType.DataType) - } else { - columnTypes = append(columnTypes, "") - } - } - - prefix, suffix := getInsertPrefixAndSuffix(driver, benthosConfig.TableSchema, benthosConfig.TableName, benthosConfig.ColumnDefaultProperties) - outputs = append(outputs, neosync_benthos.Outputs{ - Fallback: []neosync_benthos.Outputs{ - { - PooledSqlInsert: &neosync_benthos.PooledSqlInsert{ - Driver: driver, - Dsn: dsn, - - Schema: benthosConfig.TableSchema, - Table: benthosConfig.TableName, - Columns: benthosConfig.Columns, - ColumnsDataTypes: columnTypes, - ColumnDefaultProperties: benthosConfig.ColumnDefaultProperties, - OnConflictDoNothing: destOpts.OnConflictDoNothing, - SkipForeignKeyViolations: destOpts.SkipForeignKeyViolations, - TruncateOnRetry: destOpts.Truncate, - ArgsMapping: buildPlainInsertArgs(benthosConfig.Columns), - Prefix: prefix, - Suffix: suffix, - - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }, - }, - // kills activity depending on error - {Error: &neosync_benthos.ErrorOutputConfig{ - ErrorMsg: `${! meta("fallback_error")}`, - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }}, - }, - }) - } - - return outputs, nil -} - -func getInsertPrefixAndSuffix( - driver, schema, table string, - columnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties, -) (prefix, suffix *string) { - var pre, suff *string - if len(columnDefaultProperties) == 0 { - return pre, suff - } - switch driver { - case sqlmanager_shared.MssqlDriver: - if hasPassthroughIdentityColumn(columnDefaultProperties) { - enableIdentityInsert := true - p := sqlmanager_mssql.BuildMssqlSetIdentityInsertStatement(schema, table, enableIdentityInsert) - pre = &p - s := sqlmanager_mssql.BuildMssqlSetIdentityInsertStatement(schema, table, !enableIdentityInsert) - suff = &s - } - return pre, suff - default: - return pre, suff - } -} - -func hasPassthroughIdentityColumn(columnDefaultProperties map[string]*neosync_benthos.ColumnDefaultProperties) bool { - for _, d := range columnDefaultProperties { - if d.NeedsOverride && d.NeedsReset && !d.HasDefaultTransformer { - return true - } - } - return false -} - -func (b *benthosBuilder) getAwsS3SyncBenthosOutput( - connection *mgmtv1alpha1.ConnectionConfig_AwsS3Config, - benthosConfig *BenthosConfigResponse, - workflowId string, - destinationOptions *mgmtv1alpha1.AwsS3DestinationConnectionOptions, -) ([]neosync_benthos.Outputs, error) { - outputs := []neosync_benthos.Outputs{} - - s3pathpieces := []string{} - if connection.AwsS3Config.PathPrefix != nil && *connection.AwsS3Config.PathPrefix != "" { - s3pathpieces = append(s3pathpieces, strings.Trim(*connection.AwsS3Config.PathPrefix, "/")) - } - - s3pathpieces = append( - s3pathpieces, - "workflows", - workflowId, - "activities", - neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName), - "data", - `records-${!count("files")}-${!timestamp_unix_nano()}.jsonl.gz`, - ) - - maxInFlight := 64 - if destinationOptions.GetMaxInFlight() > 0 { - maxInFlight = int(destinationOptions.GetMaxInFlight()) - } - - batchCount := 100 - batchPeriod := "5s" - batchConfig := destinationOptions.GetBatch() - if batchConfig != nil { - batchCount = int(batchConfig.GetCount()) - - if batchConfig.GetPeriod() != "" { - _, err := time.ParseDuration(batchConfig.GetPeriod()) - if err != nil { - return nil, fmt.Errorf("unable to parse batch period for s3 destination config: %w", err) - } - } - batchPeriod = batchConfig.GetPeriod() - } - - if batchCount == 0 && batchPeriod == "" { - return nil, fmt.Errorf("must have at least one batch policy configured. Cannot disable both period and count") - } - - timeout := "" - if destinationOptions.GetTimeout() != "" { - _, err := time.ParseDuration(destinationOptions.GetTimeout()) - if err != nil { - return nil, fmt.Errorf("unable to parse timeout for s3 destination config: %w", err) - } - timeout = destinationOptions.GetTimeout() - } - - storageClass := "" - if destinationOptions.GetStorageClass() != mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_UNSPECIFIED { - storageClass = convertToS3StorageClass(destinationOptions.GetStorageClass()).String() - } - - outputs = append(outputs, neosync_benthos.Outputs{ - Fallback: []neosync_benthos.Outputs{ - { - AwsS3: &neosync_benthos.AwsS3Insert{ - Bucket: connection.AwsS3Config.Bucket, - MaxInFlight: maxInFlight, - Timeout: timeout, - StorageClass: storageClass, - Path: strings.Join(s3pathpieces, "/"), - ContentType: "application/gzip", - Batching: &neosync_benthos.Batching{ - Count: batchCount, - Period: batchPeriod, - Processors: []*neosync_benthos.BatchProcessor{ - {Archive: &neosync_benthos.ArchiveProcessor{Format: "lines"}}, - {Compress: &neosync_benthos.CompressProcessor{Algorithm: "gzip"}}, - }, - }, - Credentials: buildBenthosS3Credentials(connection.AwsS3Config.Credentials), - Region: connection.AwsS3Config.GetRegion(), - Endpoint: connection.AwsS3Config.GetEndpoint(), - }, - }, - // kills activity depending on error - {Error: &neosync_benthos.ErrorOutputConfig{ - ErrorMsg: `${! meta("fallback_error")}`, - Batching: &neosync_benthos.Batching{ - Period: batchPeriod, - Count: batchCount, - }, - }}, - }, - }) - return outputs, nil -} - -type S3StorageClass int - -const ( - S3StorageClass_UNSPECIFIED S3StorageClass = iota - S3StorageClass_STANDARD - S3StorageClass_REDUCED_REDUNDANCY - S3StorageClass_GLACIER - S3StorageClass_STANDARD_IA - S3StorageClass_ONEZONE_IA - S3StorageClass_INTELLIGENT_TIERING - S3StorageClass_DEEP_ARCHIVE -) - -func (s S3StorageClass) String() string { - return [...]string{ - "STORAGE_CLASS_UNSPECIFIED", - "STANDARD", - "REDUCED_REDUNDANCY", - "GLACIER", - "STANDARD_IA", - "ONEZONE_IA", - "INTELLIGENT_TIERING", - "DEEP_ARCHIVE", - }[s] -} - -func convertToS3StorageClass(protoStorageClass mgmtv1alpha1.AwsS3DestinationConnectionOptions_StorageClass) S3StorageClass { - switch protoStorageClass { - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_STANDARD: - return S3StorageClass_STANDARD - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_REDUCED_REDUNDANCY: - return S3StorageClass_REDUCED_REDUNDANCY - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_GLACIER: - return S3StorageClass_GLACIER - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_STANDARD_IA: - return S3StorageClass_STANDARD_IA - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_ONEZONE_IA: - return S3StorageClass_ONEZONE_IA - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_INTELLIGENT_TIERING: - return S3StorageClass_INTELLIGENT_TIERING - case mgmtv1alpha1.AwsS3DestinationConnectionOptions_STORAGE_CLASS_DEEP_ARCHIVE: - return S3StorageClass_DEEP_ARCHIVE - default: - return S3StorageClass_UNSPECIFIED - } -} - -func (b *benthosBuilder) getGcpCloudStorageSyncBenthosOutput( - connection *mgmtv1alpha1.ConnectionConfig_GcpCloudstorageConfig, - benthosConfig *BenthosConfigResponse, - workflowId string, -) []neosync_benthos.Outputs { - outputs := []neosync_benthos.Outputs{} - - pathpieces := []string{} - if connection.GcpCloudstorageConfig.GetPathPrefix() != "" { - pathpieces = append(pathpieces, strings.Trim(connection.GcpCloudstorageConfig.GetPathPrefix(), "/")) - } - - pathpieces = append( - pathpieces, - "workflows", - workflowId, - "activities", - neosync_benthos.BuildBenthosTable(benthosConfig.TableSchema, benthosConfig.TableName), - "data", - `${!count("files")}.txt.gz`, - ) - - outputs = append(outputs, neosync_benthos.Outputs{ - Fallback: []neosync_benthos.Outputs{ - { - GcpCloudStorage: &neosync_benthos.GcpCloudStorageOutput{ - Bucket: connection.GcpCloudstorageConfig.GetBucket(), - MaxInFlight: 64, - Path: strings.Join(pathpieces, "/"), - ContentType: shared.Ptr("txt/plain"), - ContentEncoding: shared.Ptr("gzip"), - Batching: &neosync_benthos.Batching{ - Count: 100, - Period: "5s", - Processors: []*neosync_benthos.BatchProcessor{ - {Archive: &neosync_benthos.ArchiveProcessor{Format: "lines"}}, - {Compress: &neosync_benthos.CompressProcessor{Algorithm: "gzip"}}, - }, - }, - }, - }, - // kills activity depending on error - {Error: &neosync_benthos.ErrorOutputConfig{ - ErrorMsg: `${! meta("fallback_error")}`, - Batching: &neosync_benthos.Batching{ - Period: "5s", - Count: 100, - }, - }}, - }, - }) - return outputs -} - -func getTableColMapFromMappings(mappings []*tableMapping) map[string][]string { - tableColMap := map[string][]string{} - for _, m := range mappings { - cols := []string{} - for _, c := range m.Mappings { - cols = append(cols, c.Column) - } - tn := sqlmanager_shared.BuildTable(m.Schema, m.Table) - tableColMap[tn] = cols - } - return tableColMap -} - -type referenceKey struct { - Table string - Column string -} - -// map of table primary key cols to foreign key cols -func getPrimaryKeyDependencyMap(tableDependencies map[string][]*sqlmanager_shared.ForeignConstraint) map[string]map[string][]*referenceKey { - tc := map[string]map[string][]*referenceKey{} // schema.table -> column -> ForeignKey - for table, constraints := range tableDependencies { - for _, c := range constraints { - _, ok := tc[c.ForeignKey.Table] - if !ok { - tc[c.ForeignKey.Table] = map[string][]*referenceKey{} - } - for idx, col := range c.ForeignKey.Columns { - tc[c.ForeignKey.Table][col] = append(tc[c.ForeignKey.Table][col], &referenceKey{ - Table: table, - Column: c.Columns[idx], - }) - } - } - } - return tc -} - -func findTopForeignKeySource(tableName, col string, tableDependencies map[string][]*sqlmanager_shared.ForeignConstraint) *referenceKey { - // Add the foreign key dependencies of the current table - if foreignKeys, ok := tableDependencies[tableName]; ok { - for _, fk := range foreignKeys { - for idx, c := range fk.Columns { - if c == col { - // Recursively add dependent tables and their foreign keys - return findTopForeignKeySource(fk.ForeignKey.Table, fk.ForeignKey.Columns[idx], tableDependencies) - } - } - } - } - return &referenceKey{ - Table: tableName, - Column: col, - } -} - -// builds schema.table -> FK column -> PK schema table column -// find top level primary key column if foreign keys are nested -func buildForeignKeySourceMap(tableDeps map[string][]*sqlmanager_shared.ForeignConstraint) map[string]map[string]*referenceKey { - outputMap := map[string]map[string]*referenceKey{} - for tableName, constraints := range tableDeps { - if _, ok := outputMap[tableName]; !ok { - outputMap[tableName] = map[string]*referenceKey{} - } - for _, con := range constraints { - for _, col := range con.Columns { - fk := findTopForeignKeySource(tableName, col, tableDeps) - outputMap[tableName][col] = fk - } - } - } - return outputMap -} - -type destinationOptions struct { - OnConflictDoNothing bool - Truncate bool - TruncateCascade bool - SkipForeignKeyViolations bool -} - -func getDestinationOptions(dest *mgmtv1alpha1.JobDestination) *destinationOptions { - if dest == nil || dest.Options == nil || dest.Options.Config == nil { - return &destinationOptions{} - } - switch config := dest.Options.Config.(type) { - case *mgmtv1alpha1.JobDestinationOptions_PostgresOptions: - return &destinationOptions{ - OnConflictDoNothing: config.PostgresOptions.GetOnConflict().GetDoNothing(), - Truncate: config.PostgresOptions.GetTruncateTable().GetTruncateBeforeInsert(), - TruncateCascade: config.PostgresOptions.GetTruncateTable().GetCascade(), - SkipForeignKeyViolations: config.PostgresOptions.GetSkipForeignKeyViolations(), - } - case *mgmtv1alpha1.JobDestinationOptions_MysqlOptions: - return &destinationOptions{ - OnConflictDoNothing: config.MysqlOptions.GetOnConflict().GetDoNothing(), - Truncate: config.MysqlOptions.GetTruncateTable().GetTruncateBeforeInsert(), - SkipForeignKeyViolations: config.MysqlOptions.GetSkipForeignKeyViolations(), - } - case *mgmtv1alpha1.JobDestinationOptions_MssqlOptions: - return &destinationOptions{ - SkipForeignKeyViolations: config.MssqlOptions.GetSkipForeignKeyViolations(), - } - default: - return &destinationOptions{} - } -} - -type sqlJobSourceOpts struct { - HaltOnNewColumnAddition bool - SubsetByForeignKeyConstraints bool - SchemaOpt []*schemaOptions -} -type schemaOptions struct { - Schema string - Tables []*tableOptions -} -type tableOptions struct { - Table string - WhereClause *string -} - -func getSqlJobSourceOpts( - source *mgmtv1alpha1.JobSource, -) (*sqlJobSourceOpts, error) { - switch jobSourceConfig := source.GetOptions().GetConfig().(type) { - case *mgmtv1alpha1.JobSourceOptions_Postgres: - if jobSourceConfig.Postgres == nil { - return nil, nil - } - schemaOpt := []*schemaOptions{} - for _, opt := range jobSourceConfig.Postgres.Schemas { - tableOpts := []*tableOptions{} - for _, t := range opt.GetTables() { - tableOpts = append(tableOpts, &tableOptions{ - Table: t.Table, - WhereClause: t.WhereClause, - }) - } - schemaOpt = append(schemaOpt, &schemaOptions{ - Schema: opt.GetSchema(), - Tables: tableOpts, - }) - } - return &sqlJobSourceOpts{ - HaltOnNewColumnAddition: jobSourceConfig.Postgres.HaltOnNewColumnAddition, - SubsetByForeignKeyConstraints: jobSourceConfig.Postgres.SubsetByForeignKeyConstraints, - SchemaOpt: schemaOpt, - }, nil - case *mgmtv1alpha1.JobSourceOptions_Mysql: - if jobSourceConfig.Mysql == nil { - return nil, nil - } - schemaOpt := []*schemaOptions{} - for _, opt := range jobSourceConfig.Mysql.Schemas { - tableOpts := []*tableOptions{} - for _, t := range opt.GetTables() { - tableOpts = append(tableOpts, &tableOptions{ - Table: t.Table, - WhereClause: t.WhereClause, - }) - } - schemaOpt = append(schemaOpt, &schemaOptions{ - Schema: opt.GetSchema(), - Tables: tableOpts, - }) - } - return &sqlJobSourceOpts{ - HaltOnNewColumnAddition: jobSourceConfig.Mysql.HaltOnNewColumnAddition, - SubsetByForeignKeyConstraints: jobSourceConfig.Mysql.SubsetByForeignKeyConstraints, - SchemaOpt: schemaOpt, - }, nil - case *mgmtv1alpha1.JobSourceOptions_Mssql: - if jobSourceConfig.Mssql == nil { - return nil, nil - } - schemaOpt := []*schemaOptions{} - for _, opt := range jobSourceConfig.Mssql.Schemas { - tableOpts := []*tableOptions{} - for _, t := range opt.GetTables() { - tableOpts = append(tableOpts, &tableOptions{ - Table: t.Table, - WhereClause: t.WhereClause, - }) - } - schemaOpt = append(schemaOpt, &schemaOptions{ - Schema: opt.GetSchema(), - Tables: tableOpts, - }) - } - return &sqlJobSourceOpts{ - HaltOnNewColumnAddition: jobSourceConfig.Mssql.HaltOnNewColumnAddition, - SubsetByForeignKeyConstraints: jobSourceConfig.Mssql.SubsetByForeignKeyConstraints, - SchemaOpt: schemaOpt, - }, nil - default: - return nil, fmt.Errorf("unsupported job source options type for sql job source: %T", jobSourceConfig) - } -} - -func buildBenthosS3Credentials(mgmtCreds *mgmtv1alpha1.AwsS3Credentials) *neosync_benthos.AwsCredentials { - if mgmtCreds == nil { - return nil - } - creds := &neosync_benthos.AwsCredentials{} - if mgmtCreds.Profile != nil { - creds.Profile = *mgmtCreds.Profile - } - if mgmtCreds.AccessKeyId != nil { - creds.Id = *mgmtCreds.AccessKeyId - } - if mgmtCreds.SecretAccessKey != nil { - creds.Secret = *mgmtCreds.SecretAccessKey - } - if mgmtCreds.SessionToken != nil { - creds.Token = *mgmtCreds.SessionToken - } - if mgmtCreds.FromEc2Role != nil { - creds.FromEc2Role = *mgmtCreds.FromEc2Role - } - if mgmtCreds.RoleArn != nil { - creds.Role = *mgmtCreds.RoleArn - } - if mgmtCreds.RoleExternalId != nil { - creds.RoleExternalId = *mgmtCreds.RoleExternalId - } - - return creds -} - -func areMappingsSubsetOfSchemas( - groupedSchemas map[string]map[string]*sqlmanager_shared.ColumnInfo, - mappings []*mgmtv1alpha1.JobMapping, -) bool { - tableColMappings := getUniqueColMappingsMap(mappings) - - for key := range groupedSchemas { - // For this method, we only care about the schemas+tables that we currently have mappings for - if _, ok := tableColMappings[key]; !ok { - delete(groupedSchemas, key) - } - } - - if len(tableColMappings) != len(groupedSchemas) { - return false - } - - // tests to make sure that every column in the col mappings is present in the db schema - for table, cols := range tableColMappings { - schemaCols, ok := groupedSchemas[table] - if !ok { - return false - } - // job mappings has more columns than the schema - if len(cols) > len(schemaCols) { - return false - } - for col := range cols { - if _, ok := schemaCols[col]; !ok { - return false - } - } - } - return true -} - -func getUniqueColMappingsMap( - mappings []*mgmtv1alpha1.JobMapping, -) map[string]map[string]struct{} { - tableColMappings := map[string]map[string]struct{}{} - for _, mapping := range mappings { - key := neosync_benthos.BuildBenthosTable(mapping.Schema, mapping.Table) - if _, ok := tableColMappings[key]; ok { - tableColMappings[key][mapping.Column] = struct{}{} - } else { - tableColMappings[key] = map[string]struct{}{ - mapping.Column: {}, - } - } - } - return tableColMappings -} - -func shouldHaltOnSchemaAddition( - groupedSchemas map[string]map[string]*sqlmanager_shared.ColumnInfo, - mappings []*mgmtv1alpha1.JobMapping, -) bool { - tableColMappings := getUniqueColMappingsMap(mappings) - - if len(tableColMappings) != len(groupedSchemas) { - return true - } - - for table, cols := range groupedSchemas { - mappingCols, ok := tableColMappings[table] - if !ok { - return true - } - if len(cols) > len(mappingCols) { - return true - } - for col := range cols { - if _, ok := mappingCols[col]; !ok { - return true - } - } - } - return false -} - -type sqlSourceTableOptions struct { - WhereClause *string -} - -func buildTableSubsetMap(tableOpts map[string]*sqlSourceTableOptions, tableMap map[string]*tableMapping) map[string]string { - tableSubsetMap := map[string]string{} - for table, opts := range tableOpts { - if _, ok := tableMap[table]; !ok { - continue - } - if opts != nil && opts.WhereClause != nil && *opts.WhereClause != "" { - tableSubsetMap[table] = *opts.WhereClause - } - } - return tableSubsetMap -} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils.go b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils.go deleted file mode 100644 index e0e7bc6e11..0000000000 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils.go +++ /dev/null @@ -1,57 +0,0 @@ -package genbenthosconfigs_activity - -import ( - "fmt" - "strings" - - mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" -) - -func getMapValuesCount[K comparable, V any](m map[K][]V) int { - count := 0 - for _, v := range m { - count += len(v) - } - return count -} - -func buildPlainInsertArgs(cols []string) string { - if len(cols) == 0 { - return "" - } - pieces := make([]string, len(cols)) - for idx := range cols { - pieces[idx] = fmt.Sprintf("this.%q", cols[idx]) - } - return fmt.Sprintf("root = [%s]", strings.Join(pieces, ", ")) -} - -func buildPlainColumns(mappings []*mgmtv1alpha1.JobMapping) []string { - columns := make([]string, len(mappings)) - for idx := range mappings { - columns[idx] = mappings[idx].Column - } - return columns -} - -func shouldProcessColumn(t *mgmtv1alpha1.JobMappingTransformer) bool { - switch t.GetConfig().GetConfig().(type) { - case *mgmtv1alpha1.TransformerConfig_PassthroughConfig, - nil: - return false - default: - return true - } -} - -func shouldProcessStrict(t *mgmtv1alpha1.JobMappingTransformer) bool { - switch t.GetConfig().GetConfig().(type) { - case *mgmtv1alpha1.TransformerConfig_PassthroughConfig, - *mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig, - *mgmtv1alpha1.TransformerConfig_Nullconfig, - nil: - return false - default: - return true - } -} diff --git a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils_test.go b/worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils_test.go deleted file mode 100644 index 3eff692457..0000000000 --- a/worker/pkg/workflows/datasync/activities/gen-benthos-configs/utils_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package genbenthosconfigs_activity - -import ( - "testing" - - mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1" - "github.com/stretchr/testify/require" -) - -func Test_shouldProcessColumn(t *testing.T) { - t.Run("no - passthrough", func(t *testing.T) { - actual := shouldProcessColumn(&mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, - }, - }) - require.False(t, actual) - }) - t.Run("no - nil", func(t *testing.T) { - actual := shouldProcessColumn(nil) - require.False(t, actual) - }) - t.Run("yes", func(t *testing.T) { - actual := shouldProcessColumn(&mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{}, - }, - }) - require.True(t, actual) - }) -} - -func Test_shouldProcessStrict(t *testing.T) { - t.Run("no - passthrough", func(t *testing.T) { - actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_PassthroughConfig{}, - }, - }) - require.False(t, actual) - }) - t.Run("no - default", func(t *testing.T) { - actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateDefaultConfig{}, - }, - }) - require.False(t, actual) - }) - t.Run("no - null", func(t *testing.T) { - actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_Nullconfig{}, - }, - }) - require.False(t, actual) - }) - t.Run("no - nil", func(t *testing.T) { - actual := shouldProcessStrict(nil) - require.False(t, actual) - }) - t.Run("yes", func(t *testing.T) { - actual := shouldProcessStrict(&mgmtv1alpha1.JobMappingTransformer{ - Config: &mgmtv1alpha1.TransformerConfig{ - Config: &mgmtv1alpha1.TransformerConfig_GenerateBoolConfig{}, - }, - }) - require.True(t, actual) - }) -} diff --git a/worker/pkg/workflows/datasync/activities/sync/activity.go b/worker/pkg/workflows/datasync/activities/sync/activity.go index e13d1f5542..8b8bfc6a11 100644 --- a/worker/pkg/workflows/datasync/activities/sync/activity.go +++ b/worker/pkg/workflows/datasync/activities/sync/activity.go @@ -21,6 +21,7 @@ import ( neosynclogger "github.com/nucleuscloud/neosync/backend/pkg/logger" "github.com/nucleuscloud/neosync/backend/pkg/metrics" "github.com/nucleuscloud/neosync/backend/pkg/sqlconnect" + benthosbuilder_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" connectiontunnelmanager "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager" pool_mongo_provider "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager/pool/providers/mongo" pool_sql_provider "github.com/nucleuscloud/neosync/internal/connection-tunnel-manager/pool/providers/sql" @@ -55,7 +56,7 @@ type SyncMetadata struct { type SyncRequest struct { // Deprecated BenthosConfig string - BenthosDsns []*shared.BenthosDsn + BenthosDsns []*benthosbuilder_shared.BenthosDsn // Identifier that is used in combination with the AccountId to retrieve the benthos config Name string AccountId string @@ -340,7 +341,7 @@ func (a *Activity) Sync(ctx context.Context, req *SyncRequest, metadata *SyncMet func getConnectionsFromBenthosDsns( ctx context.Context, connclient mgmtv1alpha1connect.ConnectionServiceClient, - dsns []*shared.BenthosDsn, + dsns []*benthosbuilder_shared.BenthosDsn, ) ([]*mgmtv1alpha1.Connection, error) { connections := make([]*mgmtv1alpha1.Connection, len(dsns)) diff --git a/worker/pkg/workflows/datasync/workflow/workflow.go b/worker/pkg/workflows/datasync/workflow/workflow.go index b56cdb924b..abec60ed3a 100644 --- a/worker/pkg/workflows/datasync/workflow/workflow.go +++ b/worker/pkg/workflows/datasync/workflow/workflow.go @@ -8,6 +8,8 @@ import ( "sync" "time" + benthosbuilder "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder" + benthosbuilder_shared "github.com/nucleuscloud/neosync/internal/benthos/benthos-builder/shared" neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" accountstatus_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/account-status" genbenthosconfigs_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/gen-benthos-configs" @@ -71,7 +73,7 @@ func Workflow(wfctx workflow.Context, req *WorkflowRequest) (*WorkflowResponse, ) if actOptResp.RequestedRecordCount != nil && *actOptResp.RequestedRecordCount > 0 { - logger.Info("requested record count of %d", *actOptResp.RequestedRecordCount) + logger.Info(fmt.Sprintf("requested record count of %d", *actOptResp.RequestedRecordCount)) } var initialCheckAccountStatusResponse *accountstatus_activity.CheckAccountStatusResponse var a *accountstatus_activity.Activity @@ -130,7 +132,7 @@ func Workflow(wfctx workflow.Context, req *WorkflowRequest) (*WorkflowResponse, logger.Info("completed RunSqlInitTableStatements.") redisDependsOn := map[string]map[string][]string{} // schema.table -> dependson - redisConfigs := map[string]*genbenthosconfigs_activity.BenthosRedisConfig{} + redisConfigs := map[string]*benthosbuilder_shared.BenthosRedisConfig{} for _, cfg := range bcResp.BenthosConfigs { for _, redisCfg := range cfg.RedisConfig { redisConfigs[redisCfg.Key] = redisCfg @@ -212,7 +214,7 @@ func Workflow(wfctx workflow.Context, req *WorkflowRequest) (*WorkflowResponse, started := sync.Map{} completed := sync.Map{} - executeSyncActivity := func(bc *genbenthosconfigs_activity.BenthosConfigResponse, logger log.Logger) { + executeSyncActivity := func(bc *benthosbuilder.BenthosConfigResponse, logger log.Logger) { future := invokeSync(bc, ctx, &started, &completed, logger, &bcResp.AccountId, actOptResp.SyncActivityOptions) workselector.AddFuture(future, func(f workflow.Future) { var result sync_activity.SyncResponse @@ -360,7 +362,7 @@ func runRedisCleanUpActivity( logger log.Logger, dependsOnMap map[string]map[string][]string, jobId string, - redisConfigs map[string]*genbenthosconfigs_activity.BenthosRedisConfig, + redisConfigs map[string]*benthosbuilder_shared.BenthosRedisConfig, ) error { if len(redisConfigs) > 0 { for k, cfg := range redisConfigs { @@ -402,7 +404,7 @@ func isReadyForCleanUp(table, col string, dependsOnMap map[string]map[string][]s return true } -func withBenthosConfigResponseLoggerTags(bc *genbenthosconfigs_activity.BenthosConfigResponse) []any { +func withBenthosConfigResponseLoggerTags(bc *benthosbuilder.BenthosConfigResponse) []any { keyvals := []any{} if bc.Name != "" { @@ -414,19 +416,16 @@ func withBenthosConfigResponseLoggerTags(bc *genbenthosconfigs_activity.BenthosC if bc.TableName != "" { keyvals = append(keyvals, "table", bc.TableName) } - if bc.SourceConnectionType != "" { - keyvals = append(keyvals, "sourceConnectionType", bc.SourceConnectionType) - } return keyvals } -func getSyncMetadata(config *genbenthosconfigs_activity.BenthosConfigResponse) *sync_activity.SyncMetadata { +func getSyncMetadata(config *benthosbuilder.BenthosConfigResponse) *sync_activity.SyncMetadata { return &sync_activity.SyncMetadata{Schema: config.TableSchema, Table: config.TableName} } func invokeSync( - config *genbenthosconfigs_activity.BenthosConfigResponse, + config *benthosbuilder.BenthosConfigResponse, ctx workflow.Context, started, completed *sync.Map, logger log.Logger, @@ -487,7 +486,7 @@ func updateCompletedMap(tableName string, completed *sync.Map, columns []string) return nil } -func isConfigReady(config *genbenthosconfigs_activity.BenthosConfigResponse, completed *sync.Map) (bool, error) { +func isConfigReady(config *benthosbuilder.BenthosConfigResponse, completed *sync.Map) (bool, error) { if config == nil { return false, nil } @@ -516,14 +515,14 @@ func isConfigReady(config *genbenthosconfigs_activity.BenthosConfigResponse, com } type SplitConfigs struct { - Root []*genbenthosconfigs_activity.BenthosConfigResponse - Dependents []*genbenthosconfigs_activity.BenthosConfigResponse + Root []*benthosbuilder.BenthosConfigResponse + Dependents []*benthosbuilder.BenthosConfigResponse } -func splitBenthosConfigs(configs []*genbenthosconfigs_activity.BenthosConfigResponse) *SplitConfigs { +func splitBenthosConfigs(configs []*benthosbuilder.BenthosConfigResponse) *SplitConfigs { out := &SplitConfigs{ - Root: []*genbenthosconfigs_activity.BenthosConfigResponse{}, - Dependents: []*genbenthosconfigs_activity.BenthosConfigResponse{}, + Root: []*benthosbuilder.BenthosConfigResponse{}, + Dependents: []*benthosbuilder.BenthosConfigResponse{}, } for _, cfg := range configs { if len(cfg.DependsOn) == 0 { diff --git a/worker/pkg/workflows/datasync/workflow/workflow_test.go b/worker/pkg/workflows/datasync/workflow/workflow_test.go index fbadee98d0..79cd4541c5 100644 --- a/worker/pkg/workflows/datasync/workflow/workflow_test.go +++ b/worker/pkg/workflows/datasync/workflow/workflow_test.go @@ -1,925 +1,925 @@ package datasync_workflow -import ( - "context" - "errors" - "sync" - "testing" - "time" - - "github.com/google/uuid" - tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" - neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" - accountstatus_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/account-status" - genbenthosconfigs_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/gen-benthos-configs" - runsqlinittablestmts_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/run-sql-init-table-stmts" - "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" - sync_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/sync" - syncactivityopts_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/sync-activity-opts" - syncrediscleanup_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/sync-redis-clean-up" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.temporal.io/sdk/temporal" - "go.temporal.io/sdk/testsuite" - "go.temporal.io/sdk/workflow" - "go.uber.org/atomic" -) - -func Test_Workflow_BenthosConfigsFails(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - AccountId: uuid.NewString(), - }, nil) - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything).Return(nil, errors.New("TestFailure")) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - assert.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - assert.Error(t, err) - var applicationErr *temporal.ApplicationError - assert.True(t, errors.As(err, &applicationErr)) - assert.Equal(t, "TestFailure", applicationErr.Error()) - - env.AssertExpectations(t) -} - -func Test_Workflow_Succeeds_Zero_BenthosConfigs(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - AccountId: uuid.NewString(), - }, nil) - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{}}, nil) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - assert.Nil(t, err) - - result := &WorkflowResponse{} - err = env.GetWorkflowResult(result) - assert.Nil(t, err) - assert.Equal(t, result, &WorkflowResponse{}) - - env.AssertExpectations(t) -} - -func Test_Workflow_Succeeds_SingleSync(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - AccountId: uuid.NewString(), - }, nil) - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - Config: &neosync_benthos.BenthosConfig{}, - }, - }}, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - syncActivity := sync_activity.Activity{} - env.OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, mock.Anything).Return(&sync_activity.SyncResponse{}, nil) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - assert.Nil(t, err) - - result := &WorkflowResponse{} - err = env.GetWorkflowResult(result) - assert.Nil(t, err) - assert.Equal(t, result, &WorkflowResponse{}) - - env.AssertExpectations(t) -} - -func Test_Workflow_Follows_Synchronous_DependentFlow(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - TableSchema: "public", - TableName: "users", - Columns: []string{"id"}, - }, - { - Name: "public.foo", - DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}}, - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - TableSchema: "public", - TableName: "foo", - Columns: []string{"id"}, - }, - }}, nil) - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - }, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - count := 0 - syncActivity := sync_activity.Activity{} - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - assert.Equal(t, count, 0) - count += 1 - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "foo"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - assert.Equal(t, count, 1) - count += 1 - return &sync_activity.SyncResponse{}, nil - }) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - assert.Equal(t, count, 2) - - err := env.GetWorkflowError() - assert.Nil(t, err) - - result := &WorkflowResponse{} - err = env.GetWorkflowResult(result) - assert.Nil(t, err) - assert.Equal(t, result, &WorkflowResponse{}) - - env.AssertExpectations(t) -} - -func Test_Workflow_Follows_Multiple_Dependents(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - TableSchema: "public", - TableName: "users", - Columns: []string{"id"}, - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.accounts", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "accounts", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.foo", - DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "foo", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - }}, nil) - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - }, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - counter := atomic.NewInt32(0) - syncActivity := sync_activity.Activity{} - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - counter.Add(1) - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - counter.Add(1) - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "foo"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - assert.Equal(t, counter.Load(), int32(2)) - counter.Add(1) - return &sync_activity.SyncResponse{}, nil - }) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - assert.Equal(t, counter.Load(), int32(3)) - - err := env.GetWorkflowError() - assert.Nil(t, err) - - result := &WorkflowResponse{} - err = env.GetWorkflowResult(result) - assert.Nil(t, err) - assert.Equal(t, result, &WorkflowResponse{}) - - env.AssertExpectations(t) -} - -func Test_Workflow_Follows_Multiple_Dependent_Redis_Cleanup(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - TableSchema: "public", - TableName: "users", - Columns: []string{"id"}, - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - RedisConfig: []*genbenthosconfigs_activity.BenthosRedisConfig{ - { - Key: "fake-redis-key", - Table: "public.users", - Column: "id", - }, - }, - }, - { - Name: "public.accounts", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "accounts", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - RedisConfig: []*genbenthosconfigs_activity.BenthosRedisConfig{ - { - Key: "fake-redis-key2", - Table: "public.accounts", - Column: "id", - }, - }, - }, - { - Name: "public.foo", - DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "foo", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - }}, nil) - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - }, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - counter := atomic.NewInt32(0) - syncActivities := &sync_activity.Activity{} - env. - OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - counter.Add(1) - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - counter.Add(1) - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "foo"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - assert.Equal(t, counter.Load(), int32(2)) - counter.Add(1) - return &sync_activity.SyncResponse{}, nil - }) - - env.OnActivity(syncrediscleanup_activity.DeleteRedisHash, mock.Anything, mock.Anything). - Return(&syncrediscleanup_activity.DeleteRedisHashResponse{}, nil) - env.OnActivity(syncrediscleanup_activity.DeleteRedisHash, mock.Anything, mock.Anything). - Return(&syncrediscleanup_activity.DeleteRedisHashResponse{}, nil) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - assert.Equal(t, counter.Load(), int32(3)) - - err := env.GetWorkflowError() - assert.Nil(t, err) - - result := &WorkflowResponse{} - err = env.GetWorkflowResult(result) - assert.Nil(t, err) - assert.Equal(t, result, &WorkflowResponse{}) - - env.AssertExpectations(t) -} - -func Test_Workflow_Halts_Activities_OnError(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "users", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.accounts", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "accounts", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.foo", - DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "foo", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - }}, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - }, nil) - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - - syncActivity := sync_activity.Activity{} - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). - Return(nil, errors.New("TestFailure")) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - require.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - require.Error(t, err) - var applicationErr *temporal.ApplicationError - require.True(t, errors.As(err, &applicationErr)) - require.Equal(t, "TestFailure", applicationErr.Error()) - - env.AssertExpectations(t) -} - -func Test_Workflow_Halts_Activities_On_InvalidAccountStatus(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "users", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.accounts", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "accounts", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.foo", - DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "foo", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - }}, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - }, nil) - - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true, ShouldPoll: true}, nil).Once() - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: false}, nil).Once() - - syncActivity := sync_activity.Activity{} - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). - Return(nil, errors.New("AccountTestFailure")) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - require.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - require.Error(t, err) - var applicationErr *temporal.ApplicationError - require.True(t, errors.As(err, &applicationErr)) - require.ErrorContains(t, applicationErr, invalidAccountStatusError.Error()) - - env.AssertExpectations(t) -} - -func Test_Workflow_Cleans_Up_Redis_OnError(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var genact *genbenthosconfigs_activity.Activity - env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). - Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ - { - Name: "public.users", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "users", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - RedisConfig: []*genbenthosconfigs_activity.BenthosRedisConfig{ - { - Key: "fake-redis-key", - Table: "public.users", - Column: "id", - }, - }, - }, - { - Name: "public.accounts", - DependsOn: []*tabledependency.DependsOn{}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "accounts", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - { - Name: "public.foo", - DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, - Columns: []string{"id"}, - TableSchema: "public", - TableName: "foo", - Config: &neosync_benthos.BenthosConfig{ - StreamConfig: neosync_benthos.StreamConfig{ - Input: &neosync_benthos.InputConfig{ - Inputs: neosync_benthos.Inputs{ - SqlSelect: &neosync_benthos.SqlSelect{ - Columns: []string{"id"}, - }, - }, - }, - }, - }, - }, - }}, nil) - var sqlInitActivity *runsqlinittablestmts_activity.Activity - env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). - Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) - var activityOpts *syncactivityopts_activity.Activity - env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - SyncActivityOptions: &workflow.ActivityOptions{ - StartToCloseTimeout: time.Minute, - }, - }, nil) - var accStatsActivity *accountstatus_activity.Activity - env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) - - syncActivities := &sync_activity.Activity{} - env. - OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). - Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { - return &sync_activity.SyncResponse{}, nil - }) - env. - OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}, mock.Anything). - Return(nil, errors.New("TestFailure")) - - env.OnActivity(syncrediscleanup_activity.DeleteRedisHash, mock.Anything, mock.Anything). - Return(&syncrediscleanup_activity.DeleteRedisHashResponse{}, nil) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - assert.Error(t, err) - var applicationErr *temporal.ApplicationError - assert.True(t, errors.As(err, &applicationErr)) - assert.Equal(t, "TestFailure", applicationErr.Error()) - - env.AssertExpectations(t) -} -func Test_isConfigReady(t *testing.T) { - isReady, err := isConfigReady(nil, nil) - assert.NoError(t, err) - assert.False(t, isReady, "config is nil") - - isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ - Name: "foo", - DependsOn: []*tabledependency.DependsOn{}, - }, - nil) - assert.NoError(t, err) - assert.True( - t, - isReady, - "has no dependencies", - ) - - completed := sync.Map{} - completed.Store("bar", []string{"id"}) - isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ - Name: "foo", - DependsOn: []*tabledependency.DependsOn{{Table: "bar", Columns: []string{"id"}}, {Table: "baz", Columns: []string{"id"}}}, - }, - &completed) - assert.NoError(t, err) - assert.False( - t, - isReady, - "not all dependencies are finished", - ) - - completed = sync.Map{} - completed.Store("bar", []string{"id"}) - completed.Store("baz", []string{"id"}) - isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ - Name: "foo", - DependsOn: []*tabledependency.DependsOn{{Table: "bar", Columns: []string{"id"}}, {Table: "baz", Columns: []string{"id"}}}, - }, &completed) - assert.NoError(t, err) - assert.True( - t, - isReady, - "all dependencies are finished", - ) - - completed = sync.Map{} - completed.Store("bar", []string{"id"}) - isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ - Name: "foo", - DependsOn: []*tabledependency.DependsOn{{Table: "bar", Columns: []string{"id", "f_id"}}}, - }, - &completed) - assert.NoError(t, err) - assert.False( - t, - isReady, - "not all dependencies columns are finished", - ) -} - -func Test_updateCompletedMap(t *testing.T) { - completedMap := sync.Map{} - table := "public.users" - cols := []string{"id"} - err := updateCompletedMap(table, &completedMap, cols) - assert.NoError(t, err) - val, loaded := completedMap.Load(table) - assert.True(t, loaded) - assert.Equal(t, cols, val) - - completedMap = sync.Map{} - table = "public.users" - completedMap.Store(table, []string{"name"}) - err = updateCompletedMap(table, &completedMap, []string{"id"}) - assert.NoError(t, err) - val, loaded = completedMap.Load(table) - assert.True(t, loaded) - assert.Equal(t, []string{"name", "id"}, val) -} - -func Test_isReadyForCleanUp(t *testing.T) { - assert.True(t, isReadyForCleanUp("", "", nil), "no dependencies") - - assert.False( - t, - isReadyForCleanUp( - "table", - "col", - map[string]map[string][]string{ - "other_table": {"table": []string{"col"}}, - }, - ), - "has dependency", - ) - - assert.True( - t, - isReadyForCleanUp( - "table", - "col", - map[string]map[string][]string{ - "other_table": {"table": []string{"col1"}}, - }, - ), - "no dependency", - ) -} - -func Test_Workflow_Initial_AccountStatus(t *testing.T) { - testSuite := &testsuite.WorkflowTestSuite{} - env := testSuite.NewTestWorkflowEnvironment() - - var activityOptsActivity *syncactivityopts_activity.Activity - env.OnActivity(activityOptsActivity.RetrieveActivityOptions, mock.Anything, mock.Anything). - Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ - AccountId: uuid.NewString(), - RequestedRecordCount: shared.Ptr(uint64(4)), - }, nil) - - var checkStatusActivity *accountstatus_activity.Activity - env.OnActivity(checkStatusActivity.CheckAccountStatus, mock.Anything, mock.Anything). - Return(&accountstatus_activity.CheckAccountStatusResponse{ - IsValid: false, - Reason: shared.Ptr("test failure"), - }, nil) - - env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) - - assert.True(t, env.IsWorkflowCompleted()) - - err := env.GetWorkflowError() - assert.Error(t, err) - var applicationErr *temporal.ApplicationError - assert.True(t, errors.As(err, &applicationErr)) - assert.ErrorContains(t, applicationErr, invalidAccountStatusError.Error()) - - env.AssertExpectations(t) -} +// import ( +// "context" +// "errors" +// "sync" +// "testing" +// "time" + +// "github.com/google/uuid" +// tabledependency "github.com/nucleuscloud/neosync/backend/pkg/table-dependency" +// neosync_benthos "github.com/nucleuscloud/neosync/worker/pkg/benthos" +// accountstatus_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/account-status" +// genbenthosconfigs_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/gen-benthos-configs" +// runsqlinittablestmts_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/run-sql-init-table-stmts" +// "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/shared" +// sync_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/sync" +// syncactivityopts_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/sync-activity-opts" +// syncrediscleanup_activity "github.com/nucleuscloud/neosync/worker/pkg/workflows/datasync/activities/sync-redis-clean-up" +// "github.com/stretchr/testify/assert" +// "github.com/stretchr/testify/mock" +// "github.com/stretchr/testify/require" +// "go.temporal.io/sdk/temporal" +// "go.temporal.io/sdk/testsuite" +// "go.temporal.io/sdk/workflow" +// "go.uber.org/atomic" +// ) + +// func Test_Workflow_BenthosConfigsFails(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// AccountId: uuid.NewString(), +// }, nil) +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything).Return(nil, errors.New("TestFailure")) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) +// assert.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// assert.Error(t, err) +// var applicationErr *temporal.ApplicationError +// assert.True(t, errors.As(err, &applicationErr)) +// assert.Equal(t, "TestFailure", applicationErr.Error()) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Succeeds_Zero_BenthosConfigs(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// AccountId: uuid.NewString(), +// }, nil) +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{}}, nil) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// assert.Nil(t, err) + +// result := &WorkflowResponse{} +// err = env.GetWorkflowResult(result) +// assert.Nil(t, err) +// assert.Equal(t, result, &WorkflowResponse{}) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Succeeds_SingleSync(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// AccountId: uuid.NewString(), +// }, nil) +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// Config: &neosync_benthos.BenthosConfig{}, +// }, +// }}, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// syncActivity := sync_activity.Activity{} +// env.OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, mock.Anything).Return(&sync_activity.SyncResponse{}, nil) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// assert.Nil(t, err) + +// result := &WorkflowResponse{} +// err = env.GetWorkflowResult(result) +// assert.Nil(t, err) +// assert.Equal(t, result, &WorkflowResponse{}) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Follows_Synchronous_DependentFlow(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// TableSchema: "public", +// TableName: "users", +// Columns: []string{"id"}, +// }, +// { +// Name: "public.foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}}, +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// TableSchema: "public", +// TableName: "foo", +// Columns: []string{"id"}, +// }, +// }}, nil) +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// }, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// count := 0 +// syncActivity := sync_activity.Activity{} +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// assert.Equal(t, count, 0) +// count += 1 +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "foo"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// assert.Equal(t, count, 1) +// count += 1 +// return &sync_activity.SyncResponse{}, nil +// }) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) +// assert.Equal(t, count, 2) + +// err := env.GetWorkflowError() +// assert.Nil(t, err) + +// result := &WorkflowResponse{} +// err = env.GetWorkflowResult(result) +// assert.Nil(t, err) +// assert.Equal(t, result, &WorkflowResponse{}) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Follows_Multiple_Dependents(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// TableSchema: "public", +// TableName: "users", +// Columns: []string{"id"}, +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.accounts", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "accounts", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "foo", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// }}, nil) +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// }, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// counter := atomic.NewInt32(0) +// syncActivity := sync_activity.Activity{} +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// counter.Add(1) +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// counter.Add(1) +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "foo"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// assert.Equal(t, counter.Load(), int32(2)) +// counter.Add(1) +// return &sync_activity.SyncResponse{}, nil +// }) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) +// assert.Equal(t, counter.Load(), int32(3)) + +// err := env.GetWorkflowError() +// assert.Nil(t, err) + +// result := &WorkflowResponse{} +// err = env.GetWorkflowResult(result) +// assert.Nil(t, err) +// assert.Equal(t, result, &WorkflowResponse{}) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Follows_Multiple_Dependent_Redis_Cleanup(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// TableSchema: "public", +// TableName: "users", +// Columns: []string{"id"}, +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// RedisConfig: []*genbenthosconfigs_activity.BenthosRedisConfig{ +// { +// Key: "fake-redis-key", +// Table: "public.users", +// Column: "id", +// }, +// }, +// }, +// { +// Name: "public.accounts", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "accounts", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// RedisConfig: []*genbenthosconfigs_activity.BenthosRedisConfig{ +// { +// Key: "fake-redis-key2", +// Table: "public.accounts", +// Column: "id", +// }, +// }, +// }, +// { +// Name: "public.foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "foo", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// }}, nil) +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// }, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// counter := atomic.NewInt32(0) +// syncActivities := &sync_activity.Activity{} +// env. +// OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// counter.Add(1) +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// counter.Add(1) +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "foo"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// assert.Equal(t, counter.Load(), int32(2)) +// counter.Add(1) +// return &sync_activity.SyncResponse{}, nil +// }) + +// env.OnActivity(syncrediscleanup_activity.DeleteRedisHash, mock.Anything, mock.Anything). +// Return(&syncrediscleanup_activity.DeleteRedisHashResponse{}, nil) +// env.OnActivity(syncrediscleanup_activity.DeleteRedisHash, mock.Anything, mock.Anything). +// Return(&syncrediscleanup_activity.DeleteRedisHashResponse{}, nil) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) +// assert.Equal(t, counter.Load(), int32(3)) + +// err := env.GetWorkflowError() +// assert.Nil(t, err) + +// result := &WorkflowResponse{} +// err = env.GetWorkflowResult(result) +// assert.Nil(t, err) +// assert.Equal(t, result, &WorkflowResponse{}) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Halts_Activities_OnError(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "users", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.accounts", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "accounts", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "foo", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// }}, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// }, nil) +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) + +// syncActivity := sync_activity.Activity{} +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). +// Return(nil, errors.New("TestFailure")) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// require.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// require.Error(t, err) +// var applicationErr *temporal.ApplicationError +// require.True(t, errors.As(err, &applicationErr)) +// require.Equal(t, "TestFailure", applicationErr.Error()) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Halts_Activities_On_InvalidAccountStatus(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "users", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.accounts", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "accounts", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "foo", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// }}, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// }, nil) + +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true, ShouldPoll: true}, nil).Once() +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: false}, nil).Once() + +// syncActivity := sync_activity.Activity{} +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivity.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}). +// Return(nil, errors.New("AccountTestFailure")) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// require.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// require.Error(t, err) +// var applicationErr *temporal.ApplicationError +// require.True(t, errors.As(err, &applicationErr)) +// require.ErrorContains(t, applicationErr, invalidAccountStatusError.Error()) + +// env.AssertExpectations(t) +// } + +// func Test_Workflow_Cleans_Up_Redis_OnError(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var genact *genbenthosconfigs_activity.Activity +// env.OnActivity(genact.GenerateBenthosConfigs, mock.Anything, mock.Anything). +// Return(&genbenthosconfigs_activity.GenerateBenthosConfigsResponse{BenthosConfigs: []*genbenthosconfigs_activity.BenthosConfigResponse{ +// { +// Name: "public.users", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "users", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// RedisConfig: []*genbenthosconfigs_activity.BenthosRedisConfig{ +// { +// Key: "fake-redis-key", +// Table: "public.users", +// Column: "id", +// }, +// }, +// }, +// { +// Name: "public.accounts", +// DependsOn: []*tabledependency.DependsOn{}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "accounts", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// { +// Name: "public.foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "public.users", Columns: []string{"id"}}, {Table: "public.accounts", Columns: []string{"id"}}}, +// Columns: []string{"id"}, +// TableSchema: "public", +// TableName: "foo", +// Config: &neosync_benthos.BenthosConfig{ +// StreamConfig: neosync_benthos.StreamConfig{ +// Input: &neosync_benthos.InputConfig{ +// Inputs: neosync_benthos.Inputs{ +// SqlSelect: &neosync_benthos.SqlSelect{ +// Columns: []string{"id"}, +// }, +// }, +// }, +// }, +// }, +// }, +// }}, nil) +// var sqlInitActivity *runsqlinittablestmts_activity.Activity +// env.OnActivity(sqlInitActivity.RunSqlInitTableStatements, mock.Anything, mock.Anything). +// Return(&runsqlinittablestmts_activity.RunSqlInitTableStatementsResponse{}, nil) +// var activityOpts *syncactivityopts_activity.Activity +// env.OnActivity(activityOpts.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// SyncActivityOptions: &workflow.ActivityOptions{ +// StartToCloseTimeout: time.Minute, +// }, +// }, nil) +// var accStatsActivity *accountstatus_activity.Activity +// env.OnActivity(accStatsActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{IsValid: true}, nil) + +// syncActivities := &sync_activity.Activity{} +// env. +// OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "users"}). +// Return(func(ctx context.Context, req *sync_activity.SyncRequest, metadata *sync_activity.SyncMetadata) (*sync_activity.SyncResponse, error) { +// return &sync_activity.SyncResponse{}, nil +// }) +// env. +// OnActivity(syncActivities.Sync, mock.Anything, mock.Anything, &sync_activity.SyncMetadata{Schema: "public", Table: "accounts"}, mock.Anything). +// Return(nil, errors.New("TestFailure")) + +// env.OnActivity(syncrediscleanup_activity.DeleteRedisHash, mock.Anything, mock.Anything). +// Return(&syncrediscleanup_activity.DeleteRedisHashResponse{}, nil) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// assert.Error(t, err) +// var applicationErr *temporal.ApplicationError +// assert.True(t, errors.As(err, &applicationErr)) +// assert.Equal(t, "TestFailure", applicationErr.Error()) + +// env.AssertExpectations(t) +// } +// func Test_isConfigReady(t *testing.T) { +// isReady, err := isConfigReady(nil, nil) +// assert.NoError(t, err) +// assert.False(t, isReady, "config is nil") + +// isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ +// Name: "foo", +// DependsOn: []*tabledependency.DependsOn{}, +// }, +// nil) +// assert.NoError(t, err) +// assert.True( +// t, +// isReady, +// "has no dependencies", +// ) + +// completed := sync.Map{} +// completed.Store("bar", []string{"id"}) +// isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ +// Name: "foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "bar", Columns: []string{"id"}}, {Table: "baz", Columns: []string{"id"}}}, +// }, +// &completed) +// assert.NoError(t, err) +// assert.False( +// t, +// isReady, +// "not all dependencies are finished", +// ) + +// completed = sync.Map{} +// completed.Store("bar", []string{"id"}) +// completed.Store("baz", []string{"id"}) +// isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ +// Name: "foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "bar", Columns: []string{"id"}}, {Table: "baz", Columns: []string{"id"}}}, +// }, &completed) +// assert.NoError(t, err) +// assert.True( +// t, +// isReady, +// "all dependencies are finished", +// ) + +// completed = sync.Map{} +// completed.Store("bar", []string{"id"}) +// isReady, err = isConfigReady(&genbenthosconfigs_activity.BenthosConfigResponse{ +// Name: "foo", +// DependsOn: []*tabledependency.DependsOn{{Table: "bar", Columns: []string{"id", "f_id"}}}, +// }, +// &completed) +// assert.NoError(t, err) +// assert.False( +// t, +// isReady, +// "not all dependencies columns are finished", +// ) +// } + +// func Test_updateCompletedMap(t *testing.T) { +// completedMap := sync.Map{} +// table := "public.users" +// cols := []string{"id"} +// err := updateCompletedMap(table, &completedMap, cols) +// assert.NoError(t, err) +// val, loaded := completedMap.Load(table) +// assert.True(t, loaded) +// assert.Equal(t, cols, val) + +// completedMap = sync.Map{} +// table = "public.users" +// completedMap.Store(table, []string{"name"}) +// err = updateCompletedMap(table, &completedMap, []string{"id"}) +// assert.NoError(t, err) +// val, loaded = completedMap.Load(table) +// assert.True(t, loaded) +// assert.Equal(t, []string{"name", "id"}, val) +// } + +// func Test_isReadyForCleanUp(t *testing.T) { +// assert.True(t, isReadyForCleanUp("", "", nil), "no dependencies") + +// assert.False( +// t, +// isReadyForCleanUp( +// "table", +// "col", +// map[string]map[string][]string{ +// "other_table": {"table": []string{"col"}}, +// }, +// ), +// "has dependency", +// ) + +// assert.True( +// t, +// isReadyForCleanUp( +// "table", +// "col", +// map[string]map[string][]string{ +// "other_table": {"table": []string{"col1"}}, +// }, +// ), +// "no dependency", +// ) +// } + +// func Test_Workflow_Initial_AccountStatus(t *testing.T) { +// testSuite := &testsuite.WorkflowTestSuite{} +// env := testSuite.NewTestWorkflowEnvironment() + +// var activityOptsActivity *syncactivityopts_activity.Activity +// env.OnActivity(activityOptsActivity.RetrieveActivityOptions, mock.Anything, mock.Anything). +// Return(&syncactivityopts_activity.RetrieveActivityOptionsResponse{ +// AccountId: uuid.NewString(), +// RequestedRecordCount: shared.Ptr(uint64(4)), +// }, nil) + +// var checkStatusActivity *accountstatus_activity.Activity +// env.OnActivity(checkStatusActivity.CheckAccountStatus, mock.Anything, mock.Anything). +// Return(&accountstatus_activity.CheckAccountStatusResponse{ +// IsValid: false, +// Reason: shared.Ptr("test failure"), +// }, nil) + +// env.ExecuteWorkflow(Workflow, &WorkflowRequest{}) + +// assert.True(t, env.IsWorkflowCompleted()) + +// err := env.GetWorkflowError() +// assert.Error(t, err) +// var applicationErr *temporal.ApplicationError +// assert.True(t, errors.As(err, &applicationErr)) +// assert.ErrorContains(t, applicationErr, invalidAccountStatusError.Error()) + +// env.AssertExpectations(t) +// }