diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0273992e8..babae380a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,629 +2,39 @@
For **1.x** release notes, please see [v1.x/CHANGELOG.md](https://github.com/awslabs/amazon-kinesis-client/blob/v1.x/CHANGELOG.md)
----
-### Release 2.6.0 (2024-05-01)
-* [#1317](https://github.com/awslabs/amazon-kinesis-client/pull/1317) Add enablePriorityLeaseAssignment config
-* [#1320](https://github.com/awslabs/amazon-kinesis-client/pull/1320) Update lease taker to get unassigned leases
-* [#1318](https://github.com/awslabs/amazon-kinesis-client/pull/1318) Internally construct and use stream ARNs for all streams in multi-stream mode
-* [#1291](https://github.com/awslabs/amazon-kinesis-client/pull/1291) Update RetrievalFactory implementations to utilize the StreamIdentifier field of StreamConfig
-* [#1308](https://github.com/awslabs/amazon-kinesis-client/pull/1308) Move shutdownComplete call to ShardConsumer
-* [#1313](https://github.com/awslabs/amazon-kinesis-client/pull/1313) Add additional integration tests for multi-stream and cross account access
-* [#1273](https://github.com/awslabs/amazon-kinesis-client/pull/1273) Optimize currentStreamConfigMap by cleaning up lingering streams
-* [#1302](https://github.com/awslabs/amazon-kinesis-client/pull/1302) Fix gracefulShutdown behavior in Scheduler
-
-### Release 2.5.8 (2024-03-27)
-* [#1278](https://github.com/awslabs/amazon-kinesis-client/pull/1278) Upgrade awssdk.version from 2.25.3 to 2.25.11
-* [#1279](https://github.com/awslabs/amazon-kinesis-client/pull/1279) Upgrade org.apache.maven.plugins:maven-gpg-plugin from 3.1.0 to 3.2.1
-* [#1280](https://github.com/awslabs/amazon-kinesis-client/pull/1280) Upgrade org.apache.commons:commons-lang3 from 3.12.0 to 3.14.0
-* [#1282](https://github.com/awslabs/amazon-kinesis-client/pull/1282) Upgrade org.apache.maven.plugins:maven-javadoc-plugin from 3.5.0 to 3.6.3
-* [#1277](https://github.com/awslabs/amazon-kinesis-client/pull/1277) Reuse 'ShardSyncTaskManager' instance for existing stream to avoid duplicate enqueue of 'ShardSyncTask'
-
-### Release 2.5.7 (2024-03-19)
-* [#1275](https://github.com/awslabs/amazon-kinesis-client/pull/1275) Update PollingConfig maxRecords method to return PollingConfig
-* [#1236](https://github.com/awslabs/amazon-kinesis-client/pull/1236) Upgrade commons-io:commons-io from 2.11.0 to 2.15.1
-* [#1189](https://github.com/awslabs/amazon-kinesis-client/pull/1189) Upgrade org.apache.maven.plugins:maven-resources-plugin from 3.3.0 to 3.3.1
-* [#1139](https://github.com/awslabs/amazon-kinesis-client/pull/1139) Upgrade maven-surefire-plugin from 2.22.2 to 3.1.2
-* [#1138](https://github.com/awslabs/amazon-kinesis-client/pull/1138) Upgrade maven-failsafe-plugin from 2.22.2 to 3.1.2
-* [#1125](https://github.com/awslabs/amazon-kinesis-client/pull/1125) Upgrade maven-gpg-plugin from 3.0.1 to 3.1.0
-
-### Release 2.5.6 (2024-03-08)
-* [#1271](https://github.com/awslabs/amazon-kinesis-client/pull/1271) Adding snapshot for 2.5.6-SNAPSHOT
-* [#1268](https://github.com/awslabs/amazon-kinesis-client/pull/1268) Upgrade ch.qos.logback:logback-classic dependency from 1.3.12 to 1.3.14
-* [#1268](https://github.com/awslabs/amazon-kinesis-client/pull/1268) Upgrade awssdk.version from 2.20.43 to 2.25.3
-* [#1268](https://github.com/awslabs/amazon-kinesis-client/pull/1268) Upgrade aws-java-sdk.version from 1.12.405 to 1.12.668
-* [#1268](https://github.com/awslabs/amazon-kinesis-client/pull/1268) Upgrade gsr.version from 1.1.17 to 1.1.19
-
-### Release 2.5.5 (2024-02-22)
-* [#1257](https://github.com/awslabs/amazon-kinesis-client/pull/1257) Prevent improper error logging during worker shutdown
-* [#1260](https://github.com/awslabs/amazon-kinesis-client/pull/1260) Add Deletion protection config
-* [#1258](https://github.com/awslabs/amazon-kinesis-client/pull/1258) Fix issue in configuring metricsEnabledDimensions
-* [#1259](https://github.com/awslabs/amazon-kinesis-client/pull/1259) Add snapshot to version
-
-### Release 2.5.4 (December 12, 2023)
-* [#1232](https://github.com/awslabs/amazon-kinesis-client/pull/1232) Upgrade ch.qos.logback:logback-classic dependency from 1.3.0 to 1.3.12 in /amazon-kinesis-client
-* [#1233](https://github.com/awslabs/amazon-kinesis-client/pull/1233) Upgrade ch.qos.logback:logback-classic dependency from 1.3.0 to 1.3.12 in /amazon-kinesis-client-multilang
-* [#1230](https://github.com/awslabs/amazon-kinesis-client/pull/1230) Bug fix which now allows MultiLangDaemon to configure idleTimeBetweenReadsInMillis
-* [#1229](https://github.com/awslabs/amazon-kinesis-client/pull/1229) Added link to `javadoc.io`-hosted Javadoc in the README
-* [#1218](https://github.com/awslabs/amazon-kinesis-client/pull/1218) Added doc for leases and the lease lifecycle to help explain lease lifecycle logic.
-* [#1226](https://github.com/awslabs/amazon-kinesis-client/pull/1226) Upgraded KCL from 2.5.3 to 2.5.4-SNAPSHOT
-
-### Release 2.5.3 (November 8, 2023)
-* [#1219](https://github.com/awslabs/amazon-kinesis-client/pull/1219) Provided streamArn in getRecords request
-* [#1216](https://github.com/awslabs/amazon-kinesis-client/pull/1216) Updated AWS Glue Schema Registry from version 1.1.14 to 1.1.17.
-* [#1205](https://github.com/awslabs/amazon-kinesis-client/pull/1205) Updated the FAQ with impact of changing default checkpoint.
-* [#1203](https://github.com/awslabs/amazon-kinesis-client/pull/1203) Added links from README.md to FAQ and doc folder.
-* [#1202](https://github.com/awslabs/amazon-kinesis-client/pull/1202) Introduced a FAQ for Kinesis Client Library
-* [#1200](https://github.com/awslabs/amazon-kinesis-client/pull/1200) Added test case for StreamIdentifier serialization.
-
-### Release 2.5.2 (August 7, 2023)
-* [#1184](https://github.com/awslabs/amazon-kinesis-client/pull/1184) [#367] Enhanced multi-lang `AWSCredentialsProvider=...` decoder and c…
-* [#1186](https://github.com/awslabs/amazon-kinesis-client/pull/1186) Provided documentation for multilang's new NestedPropertyKey enhancement.
-* [#1181](https://github.com/awslabs/amazon-kinesis-client/pull/1181) CVE-2023-2976: Upgrade Google Guava dependency version from `32.0.0-jre` to `32.1.1-jre`
-* [#1159](https://github.com/awslabs/amazon-kinesis-client/pull/1159) Bug fix in lease refresher integration test with occasional failures
-* [#1157](https://github.com/awslabs/amazon-kinesis-client/pull/1157) Fix NPE on graceful shutdown before DDB `LeaseCoordinator` starts.
-* [#1152](https://github.com/awslabs/amazon-kinesis-client/pull/1152) Adding resharding integration tests and changing ITs to not run by default
-* [#1162](https://github.com/awslabs/amazon-kinesis-client/pull/1162) Only deleting resource created by ITs
-* [#1158](https://github.com/awslabs/amazon-kinesis-client/pull/1158) Checkstyle: tightened `LineLength` restriction from 170 to 150.
-* [#1151](https://github.com/awslabs/amazon-kinesis-client/pull/1151) Modified `dependabot.yml` to set the correct `v[1|2].x` label.
-* [#1164](https://github.com/awslabs/amazon-kinesis-client/pull/1164) Upgraded KCL Version from 2.5.1 to 2.5.2-SNAPSHOT
-
-### Release 2.5.1 (June 27, 2023)
-* [#1143](https://github.com/awslabs/amazon-kinesis-client/pull/1143) Upgrade MultiLangDaemon to support StreamARN
-* [#1145](https://github.com/awslabs/amazon-kinesis-client/pull/1145) Introduced GitHub actions to trigger Maven builds during merge/pull requests
-* [#1136](https://github.com/awslabs/amazon-kinesis-client/pull/1136) Added testing architecture and KCL 2.x basic polling/streaming tests
-* [#1153](https://github.com/awslabs/amazon-kinesis-client/pull/1153) Checkstyle: added `UnusedImports` check.
-* [#1150](https://github.com/awslabs/amazon-kinesis-client/pull/1150) Enabled Checkstyle validation of test resources.
-* [#1149](https://github.com/awslabs/amazon-kinesis-client/pull/1149) Bound Checkstyle to `validate` goal for automated enforcement.
-* [#1148](https://github.com/awslabs/amazon-kinesis-client/pull/1148) Code cleanup to faciliate Checkstyle enforcement.
-* [#1142](https://github.com/awslabs/amazon-kinesis-client/pull/1142) Upgrade Google Guava dependency version from 31.1-jre to 32.0.0-jre
-* [#1115](https://github.com/awslabs/amazon-kinesis-client/pull/1115) Update KCL version from 2.5.0 to 2.5.1-SNAPSHOT
-
-### Release 2.5.0 (May 19, 2023)
-* **[#1109](https://github.com/awslabs/amazon-kinesis-client/pull/1109) Add support for stream ARNs**
-* **[#1065](https://github.com/awslabs/amazon-kinesis-client/pull/1065) Allow tags to be added when lease table is created**
-* [#1094](https://github.com/awslabs/amazon-kinesis-client/pull/1094) Code cleanup to introduce better testing
-* [#1088](https://github.com/awslabs/amazon-kinesis-client/pull/1088) Minimize race in PSSM to optimize shard sync calls
-* [#1086](https://github.com/awslabs/amazon-kinesis-client/pull/1086) Add additional SingleStreamTracker constructor with stream position parameter
-* [#1084](https://github.com/awslabs/amazon-kinesis-client/pull/1084) More consistent testing behavior with restartAfterRequestTimerExpires
-* [#1066](https://github.com/awslabs/amazon-kinesis-client/pull/1066) More consistent testing behavior with HashRangesAreAlwaysComplete
-* [#1072](https://github.com/awslabs/amazon-kinesis-client/pull/1072) Upgrade nexus-staging-maven-plugin from 1.6.8 to 1.6.13
-* [#1073](https://github.com/awslabs/amazon-kinesis-client/pull/1073) Upgrade slf4j-api from 2.0.6 to 2.0.7
-* [#1090](https://github.com/awslabs/amazon-kinesis-client/pull/1090) Upgrade awssdk.version from 2.20.8 to 2.20.43
-* [#1071](https://github.com/awslabs/amazon-kinesis-client/pull/1071) Upgrade maven-compiler-plugin from 3.8.1 to 3.11.0
-
-### Release 2.4.8 (March 21, 2023)
-* [#1080](https://github.com/awslabs/amazon-kinesis-client/pull/1080) Added metric in `ShutdownTask` for scenario when parent leases are missing.
-* [#1077](https://github.com/awslabs/amazon-kinesis-client/pull/1077) Reverted changes to pom property
-* [#1069](https://github.com/awslabs/amazon-kinesis-client/pull/1069) Fixed flaky InitializationWaitsWhenLeaseTableIsEmpty test
-
-
-### Release 2.4.7 (March 17, 2023)
-* **NOTE: Due to an issue during the release process, the 2.4.7 published artifacts are incomplete and non-viable. Please use 2.4.8 or later.**
-* [#1063](https://github.com/awslabs/amazon-kinesis-client/pull/1063) Allow leader to learn new leases upon re-election to avoid unnecessary shardSyncs
-* [#1060](https://github.com/awslabs/amazon-kinesis-client/pull/1060) Add new metric to be emitted on lease creation
-* [#1057](https://github.com/awslabs/amazon-kinesis-client/pull/1057) Added more logging in `Scheduler` w.r.t. `StreamConfig`s.
-* [#1059](https://github.com/awslabs/amazon-kinesis-client/pull/1059) DRY: simplification of `HierarchicalShardSyncerTest`.
-* [#1062](https://github.com/awslabs/amazon-kinesis-client/pull/1062) Fixed retry storm in `PrefetchRecordsPublisher`.
-* [#1061](https://github.com/awslabs/amazon-kinesis-client/pull/1061) Fixed NPE in `LeaseCleanupManager`.
-* [#1056](https://github.com/awslabs/amazon-kinesis-client/pull/1056) Clean up in-memory state of deleted kinesis stream in MultiStreamMode
-* [#1058](https://github.com/awslabs/amazon-kinesis-client/pull/1058) Documentation: added `
` tags so fixed-format diagrams aren't garbled.
-* [#1053](https://github.com/awslabs/amazon-kinesis-client/pull/1053) Exposed convenience method of `ExtendedSequenceNumber#isSentinelCheckpoint()`
-* [#1043](https://github.com/awslabs/amazon-kinesis-client/pull/1043) Removed a `.swp` file, and updated `.gitignore`.
-* [#1047](https://github.com/awslabs/amazon-kinesis-client/pull/1047) Upgrade awssdk.version from 2.19.31 to 2.20.8
-* [#1046](https://github.com/awslabs/amazon-kinesis-client/pull/1046) Upgrade maven-javadoc-plugin from 3.3.1 to 3.5.0
-* [#1038](https://github.com/awslabs/amazon-kinesis-client/pull/1038) Upgrade gsr.version from 1.1.13 to 1.1.14
-* [#1037](https://github.com/awslabs/amazon-kinesis-client/pull/1037) Upgrade aws-java-sdk.version from 1.12.370 to 1.12.405
-
-### Release 2.4.6 (February 21, 2023)
-* [#1041](https://github.com/awslabs/amazon-kinesis-client/pull/1041) Minor optimizations (e.g., calculate-once, put instead of get+put)
-* [#1035](https://github.com/awslabs/amazon-kinesis-client/pull/1035) Release Note updates to avoid duplication and bitrot (e.g., 1.x release
-* [#935](https://github.com/awslabs/amazon-kinesis-client/pull/935) Pass isAtShardEnd correctly to processRecords call
-* [#1040](https://github.com/awslabs/amazon-kinesis-client/pull/1040) Increased logging verbosity around lease management
-* [#1024](https://github.com/awslabs/amazon-kinesis-client/pull/1024) Added logging w.r.t. StreamConfig handling.
-* [#1034](https://github.com/awslabs/amazon-kinesis-client/pull/1034) Optimization: 9~15% improvement in KinesisDataFetcher wall-time
-* [#1045](https://github.com/awslabs/amazon-kinesis-client/pull/1045) Fixed duplication of project version in children pom.xml
-* [#956](https://github.com/awslabs/amazon-kinesis-client/pull/956) Fixed warning message typos
-* [#795](https://github.com/awslabs/amazon-kinesis-client/pull/795) Fixed log message spacing
-* [#740](https://github.com/awslabs/amazon-kinesis-client/pull/740) Fixed typo in Comment
-* [#1028](https://github.com/awslabs/amazon-kinesis-client/pull/1028) Refactored MultiStreamTracker to provide and enhance OOP for both
-* [#1027](https://github.com/awslabs/amazon-kinesis-client/pull/1027) Removed CHECKSTYLE:OFF toggles which can invite/obscure sub-par code.
-* [#1032](https://github.com/awslabs/amazon-kinesis-client/pull/1032) Upgrade rxjava from 3.1.5 to 3.1.6
-* [#1030](https://github.com/awslabs/amazon-kinesis-client/pull/1030) Upgrade awssdk.version from 2.19.2 to 2.19.31
-* [#1029](https://github.com/awslabs/amazon-kinesis-client/pull/1029) Upgrade slf4j-api from 2.0.0 to 2.0.6
-* [#1015](https://github.com/awslabs/amazon-kinesis-client/pull/1015) Upgrade protobuf-java from 3.21.5 to 3.21.12
-
-### Release 2.4.5 (January 04, 2023)
-* [#1014](https://github.com/awslabs/amazon-kinesis-client/pull/1014) Use AFTER_SEQUENCE_NUMBER iterator type for expired iterator request
-
-### Release 2.4.4 (December 23, 2022)
-* [#1017](https://github.com/awslabs/amazon-kinesis-client/pull/1017) Upgrade aws sdk
- * aws-java-sdk.version from 1.12.296 -> 1.12.370
- * awssdk.version from 2.17.268 -> 2.19.2
-* [#1020](https://github.com/awslabs/amazon-kinesis-client/pull/1020) Correct the KCL version in the main pom
-
-### Release 2.4.3 (September 6, 2022)
-* [#980](https://github.com/awslabs/amazon-kinesis-client/pull/980) logback-classic: 1.2.9 -> 1.4.0
-* [#983](https://github.com/awslabs/amazon-kinesis-client/pull/983)
- * protobuf-java: 3.19.2 -> 3.21.5
- * slf4j.version: 1.7.32 -> 2.0.0
- * schema-registry-serde: 1.1.9 -> 1.1.13
-* [#984](https://github.com/awslabs/amazon-kinesis-client/pull/984) awssdk.version from 2.17.108 to 2.17.267
-* [#987](https://github.com/awslabs/amazon-kinesis-client/pull/987) guava: 31.0.1-jre -> 31.1-jre
-* [#988](https://github.com/awslabs/amazon-kinesis-client/pull/988) jcommander: 1.81 to 1.82
-* [#990](https://github.com/awslabs/amazon-kinesis-client/pull/990) Upgrade dependencies
- * aws-java-sdk.version: 1.12.130 -> 1.12.296
- * lombok: 1.18.22 -> 1.18.24
- * rxjava: 3.1.3 -> 3.1.5
- * maven-resources-plugin: 2.6 -> 3.3.0
- * logback-classic: 1.4.0 -> 1.3.0
- * awssdk.version: 2.17.267 -> 2.17.268
-
-### Release 2.4.2 (August 10, 2022)
-* [#972](https://github.com/awslabs/amazon-kinesis-client/pull/972) Upgrade Lombok to version 1.18.24
-
-### Latest Release 2.4.1 (March 24, 2022)
-[Milestone#68](https://github.com/awslabs/amazon-kinesis-client/milestone/68)
-* [#916](https://github.com/awslabs/amazon-kinesis-client/pull/916) Upgrade to rxjava3
-
-### Release 2.4.0 (March 2, 2022)
-[Milestone#67](https://github.com/awslabs/amazon-kinesis-client/milestone/67)
-* [#894](https://github.com/awslabs/amazon-kinesis-client/pull/894) Bump protobuf-java from 3.19.1 to 3.19.2
-* [#924](https://github.com/awslabs/amazon-kinesis-client/pull/924) Support Protobuf Data format with Glue Schema Registry.
-
-### Latest Release 2.3.10 (January 4, 2022)
-[Milestone#66](https://github.com/awslabs/amazon-kinesis-client/milestone/66)
-* [#868](https://github.com/awslabs/amazon-kinesis-client/pull/868) Adding a new metric: Application-level MillisBehindLatest
-* [#879](https://github.com/awslabs/amazon-kinesis-client/pull/879) Keep dependencies up-to-date
-* [#886](https://github.com/awslabs/amazon-kinesis-client/pull/886) Get latest counter before attempting a take to ensure take succeeds
-* [#888](https://github.com/awslabs/amazon-kinesis-client/pull/888) Configure dependabot for v1.x branch
-
-### Release 2.3.9 (November 22, 2021)
-[Milestone#65](https://github.com/awslabs/amazon-kinesis-client/milestone/65)
-* [#866](https://github.com/awslabs/amazon-kinesis-client/pull/866) Update logback dependency.
-
-### Release 2.3.8 (October 27, 2021)
-[Milestone#64](https://github.com/awslabs/amazon-kinesis-client/milestone/64)
-* [#860](https://github.com/awslabs/amazon-kinesis-client/pull/860) Upgrade Glue schema registry from 1.1.4 to 1.1.5.
-* [#861](https://github.com/awslabs/amazon-kinesis-client/pull/861) Revert [PR#847](https://github.com/awslabs/amazon-kinesis-client/pull/847) due to regression for leases without owners and added new tests.
-
-### Release 2.3.7 (October 11, 2021)
-[Milestone#63](https://github.com/awslabs/amazon-kinesis-client/milestone/63)
-* [#842](https://github.com/awslabs/amazon-kinesis-client/pull/842) Fixing typo is debug logs.
-* [#846](https://github.com/awslabs/amazon-kinesis-client/pull/846) Fix DynamoDBLeaseTaker logging of available leases
-* [#847](https://github.com/awslabs/amazon-kinesis-client/pull/847) Make use of Java 8 to simplify computeLeaseCounts()
-* [#853](https://github.com/awslabs/amazon-kinesis-client/pull/853) Add configurable initial position for orphaned stream
-* [#854](https://github.com/awslabs/amazon-kinesis-client/pull/854) Create DynamoDB tables on On-Demand billing mode by default.
-* [#855](https://github.com/awslabs/amazon-kinesis-client/pull/855) Emit Glue Schema Registry usage metrics
-* [#857](https://github.com/awslabs/amazon-kinesis-client/pull/857) Fix to shutdown PrefetchRecordsPublisher in gracefull manner
-* [#858](https://github.com/awslabs/amazon-kinesis-client/pull/858) Upgrade AWS SDK version to 2.17.52.
-
-### Release 2.3.6 (July 9, 2021)
-[Milestone#62](https://github.com/awslabs/amazon-kinesis-client/milestone/62)
-* [#836](https://github.com/awslabs/amazon-kinesis-client/pull/836) Upgraded AWS SDK version to 2.16.98
-* [#835](https://github.com/awslabs/amazon-kinesis-client/pull/835) Upgraded Glue Schema Registry version to 1.1.1
-* [#828](https://github.com/awslabs/amazon-kinesis-client/pull/828) Modified wildcard imports to individual imports.
-* [#817](https://github.com/awslabs/amazon-kinesis-client/pull/817) Updated the Worker shutdown logic to make sure that the `LeaseCleanupManager` also terminates all the threads that it has started.
-* [#794](https://github.com/awslabs/amazon-kinesis-client/pull/794) Silence warning when there are no stale streams to delete.
-
-### Release 2.3.5 (June 14, 2021)
-[Milestone#59](https://github.com/awslabs/amazon-kinesis-client/milestone/59)
-* [#824](https://github.com/awslabs/amazon-kinesis-client/pull/824) Upgraded dependencies
- * logback-classic version to 1.2.3
- * AWS Java SDK version to 1.12.3
- * AWS SDK version to 2.16.81
-* [#815](https://github.com/awslabs/amazon-kinesis-client/pull/815) Converted Future to CompletableFuture which helps in proper conversion to Scala using Scala Future Converters.
-* [#810](https://github.com/awslabs/amazon-kinesis-client/pull/810) Bump commons-io from 2.6 to 2.7
-* [#804](https://github.com/awslabs/amazon-kinesis-client/pull/804) Allowing user to specify an initial timestamp in which daemon will process records.
-* [#802](https://github.com/awslabs/amazon-kinesis-client/pull/802) Upgraded guava from 26.0-jre to 29.0-jre
-* [#801](https://github.com/awslabs/amazon-kinesis-client/pull/801) Fixing a bug that causes to block indefinitely when trying to unlock a lock that isn't locked.
-* [#762](https://github.com/awslabs/amazon-kinesis-client/pull/762) Added support for web identity token in multilang
-
-### Release 2.3.4 (February 19, 2021)
-[Milestone#56](https://github.com/awslabs/amazon-kinesis-client/milestone/56)
-* [#788](https://github.com/awslabs/amazon-kinesis-client/pull/788) Fixing a bug that caused paginated `ListShards` calls with the `ShardFilter` parameter to fail when the lease table was being initialized.
-
-### Release 2.3.3 (December 23, 2020)
-[Milestone#55](https://github.com/awslabs/amazon-kinesis-client/milestone/55)
-* Fixing bug in PrefetchRecordsPublisher which was causing retry storms if initial request fails.
-* Fixing bug where idleTimeBetweenReadsInMillis property was ignored in PollingConfig.
-
-### Release 2.3.2 (November 19, 2020)
-[Milestone#54](https://github.com/awslabs/amazon-kinesis-client/milestone/54)
-* Adding support for Glue Schema Registry. Deserialize and read schemas associated with the records.
-* Updating AWS SDK version to 2.15.31
-
-
-### Release 2.3.1 (October 20, 2020)
-[Milestone#53](https://github.com/awslabs/amazon-kinesis-client/milestone/53)
-* Introducing support for processing multiple kinesis data streams with the same KCL 2.x for java consumer application
- * To build a consumer application that can process multiple streams at the same time, you must implement a new
- interface called MultistreamTracker (https://github.com/awslabs/amazon-kinesis-client/blob/0c5042dadf794fe988438436252a5a8fe70b6b0b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/processor/MultiStreamTracker.java)
-
- * MultistreamTracker will also publish various metrics around the current active streams being processed, the number
- of streams which are deleted at this time period or are pending deletion.
-
-
-### Release 2.3.0 (August 17, 2020)
-* [Milestone#52](https://github.com/awslabs/amazon-kinesis-client/milestones/52)
-
-* Behavior of shard synchronization is moving from each worker independently learning about all existing shards to workers only discovering the children of shards that each worker owns. This optimizes memory usage, lease table IOPS usage, and number of calls made to kinesis for streams with high shard counts and/or frequent resharding.
-* When bootstrapping an empty lease table, KCL utilizes the `ListShard` API's filtering option (the ShardFilter optional request parameter) to retrieve and create leases only for a snapshot of shards open at the time specified by the `ShardFilter` parameter. The `ShardFilter` parameter enables you to filter out the response of the `ListShards` API, using the `Type` parameter. KCL uses the `Type` filter parameter and the following of its valid values to identify and return a snapshot of open shards that might require new leases.
- * Currently, the following shard filters are supported:
- * `AT_TRIM_HORIZON` - the response includes all the shards that were open at `TRIM_HORIZON`.
- * `AT_LATEST` - the response includes only the currently open shards of the data stream.
- * `AT_TIMESTAMP` - the response includes all shards whose start timestamp is less than or equal to the given timestamp and end timestamp is greater than or equal to the given timestamp or still open.
- * `ShardFilter` is used when creating leases for an empty lease table to initialize leases for a snapshot of shards specified at `RetrievalConfig#initialPositionInStreamExtended`.
- * For more information about ShardFilter, see the [official AWS documentation on ShardFilter](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_ShardFilter.html).
-
-* Introducing support for the `ChildShards` response of the `GetRecords` and the `SubscribeToShard` APIs to perform lease/shard synchronization that happens at `SHARD_END` for closed shards, allowing a KCL worker to only create leases for the child shards of the shard it finished processing.
- * For shared throughout consumer applications, this uses the `ChildShards` response of the `GetRecords` API. For dedicated throughput (enhanced fan-out) consumer applications, this uses the `ChildShards` response of the `SubscribeToShard` API.
- * For more information, see the official AWS Documentation on [GetRecords](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetRecords.html), [SubscribeToShard](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_SubscribeToShard.html), and [ChildShard](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_ChildShard.html).
-
-* KCL now also performs additional periodic shard/lease scans in order to identify any potential holes in the lease table to ensure the complete hash range of the stream is being processed and create leases for them if required. `PeriodicShardSyncManager` is the new component that is responsible for running periodic lease/shard scans.
- * New configuration options are available to configure `PeriodicShardSyncManager` in `LeaseManagementConfig`
-
- | Name | Default | Description |
- | ----------------------------------------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
- | leasesRecoveryAuditorExecutionFrequencyMillis | 120000 (2 minutes) | Frequency (in millis) of the auditor job to scan for partial leases in the lease table. If the auditor detects any hole in the leases for a stream, then it would trigger shard sync based on leasesRecoveryAuditorInconsistencyConfidenceThreshold. |
- | leasesRecoveryAuditorInconsistencyConfidenceThreshold | 3 | Confidence threshold for the periodic auditor job to determine if leases for a stream in the lease table is inconsistent. If the auditor finds same set of inconsistencies consecutively for a stream for this many times, then it would trigger a shard sync |
+For **2.x** release notes, please see [v2.x/CHANGELOG.md](https://github.com/awslabs/amazon-kinesis-client/blob/v2.x/CHANGELOG.md)
- * New CloudWatch metrics are also now emitted to monitor the health of `PeriodicShardSyncManager`:
-
- | Name | Description |
- | --------------------------- | ------------------------------------------------------ |
- | NumStreamsWithPartialLeases | Number of streams that had holes in their hash ranges. |
- | NumStreamsToSync | Number of streams which underwent a full shard sync. |
-
-* Introducing deferred lease cleanup. Leases will be deleted asynchronously by `LeaseCleanupManager` upon reaching `SHARD_END`, when a shard has either expired past the stream’s retention period or been closed as the result of a resharding operation.
- * New configuration options are available to configure `LeaseCleanupManager`.
-
- | Name | Default | Description |
- | ----------------------------------- | ---------- | --------------------------------------------------------------------------------------------------------- |
- | leaseCleanupIntervalMillis | 1 minute | Interval at which to run lease cleanup thread. |
- | completedLeaseCleanupIntervalMillis | 5 minutes | Interval at which to check if a lease is completed or not. |
- | garbageLeaseCleanupIntervalMillis | 30 minutes | Interval at which to check if a lease is garbage (i.e trimmed past the stream's retention period) or not. |
-
-* Introducing _experimental_ support for multistreaming, allowing a single KCL application to multiplex processing multiple streams.
- * New configuration options are available to enable multistreaming in `RetrievalConfig#appStreamTracker`.
-
-* Fixing a bug in `PrefetchRecordsPublisher` restarting while it was already running.
-* Including an optimization to `HierarchicalShardSyncer` to only create leases for one layer of shards.
-* Adding support to prepare and commit lease checkpoints with arbitrary bytes.
- * This allows checkpointing of an arbitrary byte buffer up to the maximum permitted DynamoDB item size ([currently 400 KB as of release](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html)), and can be used for recovery by passing a serialized byte buffer to `RecordProcessorCheckpointer#prepareCheckpoint` and `RecordProcessorCheckpointer#checkpoint`.
-* Upgrading version of AWS SDK to 2.14.0.
-* [#725](https://github.com/awslabs/amazon-kinesis-client/pull/725) Allowing KCL to consider lease tables in `UPDATING` healthy.
-
-### Release 2.2.11 (May 28, 2020)
-[Milestone#51](https://github.com/awslabs/amazon-kinesis-client/milestone/51)
-* Adjusting HTTP2 initial window size to 512 KB
- * [PR#706](https://github.com/awslabs/amazon-kinesis-client/pull/706)
-* Updating protobuf-java to version 3.11.4
- * [PR#718](https://github.com/awslabs/amazon-kinesis-client/pull/718)
-* Updating the AWS Java SDK to version 2.13.25
- * [PR#722](https://github.com/awslabs/amazon-kinesis-client/pull/722)
-
-### Release 2.2.10 (March 26, 2020)
-[Milestone#48](https://github.com/awslabs/amazon-kinesis-client/milestone/48)
-* Fixing a bug in DynamoDB billing mode support for special regions.
- * [PR#703](https://github.com/awslabs/amazon-kinesis-client/pull/703)
-* Adding request id logging to ShardConsumerSubscriber.
- * [PR#705](https://github.com/awslabs/amazon-kinesis-client/pull/705)
-
-### Release 2.2.9 (Febuary 17, 2020)
-[Milestone#47](https://github.com/awslabs/amazon-kinesis-client/milestone/47)
-* Updating the AWS SDK version to 2.10.66.
- * [PR#687](https://github.com/awslabs/amazon-kinesis-client/commit/8aaf2aa11c43f77f459732cdb7d88f4418d367ff)
-* Adding request id logging to SubscribeToShard response.
- * [PR#678](https://github.com/awslabs/amazon-kinesis-client/pull/678)
-
-### Release 2.2.8 (January 28, 2020)
-[Milestone#46](https://github.com/awslabs/amazon-kinesis-client/milestone/45)
-* Updating the AWS SDK version to 2.10.56.
- * [PR#679](https://github.com/awslabs/amazon-kinesis-client/pull/679)
- * NOTE: SDK has a known connection teardown issue when multiple H2 streams are used within a connection. This might result in shard consumers sticking to a stale service host and not progressing. If your shard consumer gets stuck, use the following configuration as a workaround. This configuration might result in up to 5X increase in total connections.
- ```
- KinesisAsyncClient kinesisClient = KinesisAsyncClient.builder()
- .region(region)
- .httpClientBuilder(NettyNioAsyncHttpClient.builder().maxConcurrency(Integer.MAX_VALUE).http2Configuration(Http2Configuration.builder().maxStreams(1).build())
- .build()
- ```
-* Making ShardConsumerTest resilient to race conditions.
- * [PR#668](https://github.com/awslabs/amazon-kinesis-client/pull/668)
-* Updating integration test naming.
- * [PR#667](https://github.com/awslabs/amazon-kinesis-client/pull/667)
-
-### Release 2.2.7 (December 2, 2019)
-[Milestone#45](https://github.com/awslabs/amazon-kinesis-client/milestone/45)
-* Updating the AWS SDK version to 2.10.25
- * [PR#657](https://github.com/awslabs/amazon-kinesis-client/pull/657)
-* Adding a configurable DynamoDB billing mode
- * [PR#582](https://github.com/awslabs/amazon-kinesis-client/pull/582)
- * NOTE: Billing mode is not available in all regions; if your lease table cannot be created, use the following configuration as a workaround:
- ```
- LeaseManagementConfig leaseManagementConfig = builder.leaseManagementConfig().billingMode(null).build();
- ```
-
-
-### Release 2.2.6 (November 7, 2019)
-[Milestone#43](https://github.com/awslabs/amazon-kinesis-client/milestone/43)
-* Updating the SDK version to 2.9.25.
- * [PR#638](https://github.com/awslabs/amazon-kinesis-client/pull/638)
-* Clearing the local cache on a subscription termination, to avoid noisy logs on new subscriptions.
- * [PR#642](https://github.com/awslabs/amazon-kinesis-client/pull/642)
-* Updating the SDK version to 2.10.0 in order to fix the premature H2 stream close issue.
- * [PR#649](https://github.com/awslabs/amazon-kinesis-client/pull/649)
- * NOTE: SDK has a known connection teardown issue when multiple H2 streams are used within a connection. This might result in shard consumers sticking to a stale service host and not progressing. If your shard consumer gets stuck, use the following configuration as a workaround. This configuration might result in up to 5X increase in total connections.
- ```
- KinesisAsyncClient kinesisClient = KinesisAsyncClient.builder()
- .region(region)
- .httpClientBuilder(NettyNioAsyncHttpClient.builder().maxConcurrency(Integer.MAX_VALUE).maxHttp2Streams(1))
- .build()
- ```
-
-### Release 2.2.5 (October 23, 2019)
-
-[Milestone#40](https://github.com/awslabs/amazon-kinesis-client/milestone/40)
-* Updating Sonatype to dedicated AWS endpoint.
- * [PR#619](https://github.com/awslabs/amazon-kinesis-client/pull/619)
-* Introducing a validation step to verify if ShardEnd is reached, to prevent shard consumer stuck scenarios in the event of malformed response from service.
- * [PR#624](https://github.com/awslabs/amazon-kinesis-client/pull/624)
-
-### Release 2.2.4 (September 23, 2019)
-
-[Milestone#39](https://github.com/awslabs/amazon-kinesis-client/milestone/39)
-* Making FanoutRecordsPublisher test cases resilient to delayed thread operations
- * [PR#612](https://github.com/awslabs/amazon-kinesis-client/pull/612)
-* Drain delivery queue in the FanoutRecordsPublisher to make slow consumers consume events at their pace
- * [PR#607](https://github.com/awslabs/amazon-kinesis-client/pull/607)
-* Fix to prevent the onNext event going to stale subscription when restart happens in PrefetchRecordsPublisher
- * [PR#606](https://github.com/awslabs/amazon-kinesis-client/pull/606)
-
-### Release 2.2.3 (September 04, 2019)
-
-[Milestone#38](https://github.com/awslabs/amazon-kinesis-client/milestone/38)
-* Fix to prevent data loss and stuck shards in the event of failed records delivery in Polling readers
- * [PR#603](https://github.com/awslabs/amazon-kinesis-client/pull/603)
-
-### Release 2.2.2 (August 19, 2019)
-
-[Milestone#36](https://github.com/awslabs/amazon-kinesis-client/milestone/36)
-* Fix to prevent invalid ShardConsumer state transitions due to rejected executor service executions.
- * [PR#560](https://github.com/awslabs/amazon-kinesis-client/pull/560)
-* Fixing a bug in which initial subscription failure caused a shard consumer to get stuck.
- * [PR#562](https://github.com/awslabs/amazon-kinesis-client/pull/562)
-* Making CW publish failures visible by executing the async publish calls in a blocking manner and logging on exception.
- * [PR#584](https://github.com/awslabs/amazon-kinesis-client/pull/584)
-* Update shard end checkpoint failure messaging.
- * [PR#591](https://github.com/awslabs/amazon-kinesis-client/pull/591)
-* A fix for resiliency and durability issues that occur in the reduced thread mode - Nonblocking approach.
- * [PR#573](https://github.com/awslabs/amazon-kinesis-client/pull/573)
-* Preventing duplicate delivery due to unacknowledged event, while completing the subscription.
- * [PR#596](https://github.com/awslabs/amazon-kinesis-client/pull/596)
-
-### Release 2.2.1 (July 1, 2019)
-[Milestone#32](https://github.com/awslabs/amazon-kinesis-client/milestone/32)
-* Add periodic logging for the state of the thread pool executor service. This service executes the async tasks submitted to and by the ShardConsumer.
-* Add logging of failures from RxJava layer.
- * [PR#559](https://github.com/awslabs/amazon-kinesis-client/pull/559)
-
-### Release 2.2.0 (April 8, 2019)
-[Milestone#31](https://github.com/awslabs/amazon-kinesis-client/milestone/31)
-* Updated License to [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)
- * [PR#523](https://github.com/awslabs/amazon-kinesis-client/pull/523)
-* Introducing configuration for suppressing logs from ReadTimeoutExceptions caused while calling SubscribeToShard.
-Suppression can be configured by setting `LifecycleConfig#readTimeoutsToIgnoreBeforeWarning(Count)`.
- * [PR#528](https://github.com/awslabs/amazon-kinesis-client/issues/528)
-
-### Release 2.1.3 (March 18, 2019)
-[Milestone#30](https://github.com/awslabs/amazon-kinesis-client/milestone/30)
-* Added a message to recommend using `KinesisClientUtil` when an acquire timeout occurs in the `FanOutRecordsPublisher`.
- * [PR#514](https://github.com/awslabs/amazon-kinesis-client/pull/514)
-* Added a sleep between retries while waiting for a newly created stream consumer to become active.
- * [PR#506](https://github.com/awslabs/amazon-kinesis-client/issues/506)
-* Added timeouts on all futures returned from the DynamoDB and Kinesis clients.
- The timeouts can be configured by setting `LeaseManagementConfig#requestTimeout(Duration)` for DynamoDB, and `PollingConfig#kinesisRequestTimeout(Duration)` for Kinesis.
- * [PR#518](https://github.com/awslabs/amazon-kinesis-client/pull/518)
-* Upgraded to SDK version 2.5.10.
- * [PR#518](https://github.com/awslabs/amazon-kinesis-client/pull/518)
-* Artifacts for the Amazon Kinesis Client for Java are now signed by a new GPG key:
- ```
- pub 4096R/86368934 2019-02-14 [expires: 2020-02-14]
- uid Amazon Kinesis Tools
- ```
-
-### Release 2.1.2 (February 18, 2019)
-[Milestone#29](https://github.com/awslabs/amazon-kinesis-client/milestone/29)
-* Fixed handling of the progress detection in the `ShardConsumer` to restart from the last accepted record, instead of the last queued record.
- * [PR#492](https://github.com/awslabs/amazon-kinesis-client/pull/492)
-* Fixed handling of exceptions when using polling so that it will no longer treat `SdkException`s as an unexpected exception.
- * [PR#497](https://github.com/awslabs/amazon-kinesis-client/pull/497)
- * [PR#502](https://github.com/awslabs/amazon-kinesis-client/pull/502)
-* Fixed a case where lease loss would block the `Scheduler` while waiting for a record processor's `processRecords` method to complete.
- * [PR#501](https://github.com/awslabs/amazon-kinesis-client/pull/501)
-
-### Release 2.1.1 (February 6, 2019)
-[Milestone#28](https://github.com/awslabs/amazon-kinesis-client/milestone/28)
-* Introducing `SHUT_DOWN_STARTED` state for the `WorkerStateChangeListener`.
- * [PR#457](https://github.com/awslabs/amazon-kinesis-client/pull/457)
-* Fixed a bug with `AWSSessionCredentials` using `AWSSecretID` instead of `AWSAccessID` and vice versa.
- * [PR#486](https://github.com/awslabs/amazon-kinesis-client/pull/486)
-* Upgrading SDK version to 2.4.0, which includes a fix for a possible deadlock when using Enhanced Fan-Out.
- * [PR#493](https://github.com/awslabs/amazon-kinesis-client/pull/493)
-
-### Release 2.1.0 (January 14, 2019)
-[Milestone #27](https://github.com/awslabs/amazon-kinesis-client/milestone/27)
-* Introducing MultiLangDaemon support for Enhanced Fan-Out.
-* MultiLangDaemon now supports the following command line options.
- * `--properties-file`: Properties file that the KCL should use to set up the Scheduler.
- * `--log-configuration`: logback.xml that the KCL should use for logging.
-* Updated AWS SDK dependency to 2.2.0.
-* MultiLangDaemon now uses logback for logging.
-
-### Release 2.0.5 (November 12, 2018)
-[Milestone #26](https://github.com/awslabs/amazon-kinesis-client/milestone/26?closed=1)
-* Fixed a deadlock condition that could occur when using the polling model.
- When using the `PollingConfig` and a slower record processor it was possible to hit a deadlock in the retrieval of records.
- * [PR #462](https://github.com/awslabs/amazon-kinesis-client/pull/462)
- * [Issue #448](https://github.com/awslabs/amazon-kinesis-client/issues/448)
-* Adjusted `RetrievalConfig`, and `FanOutConfig` to use accessors instead of direct member access.
- * [PR #453](https://github.com/awslabs/amazon-kinesis-client/pull/453)
-
-
-### Release 2.0.4 (October 18, 2018)
-[Milestone #25](https://github.com/awslabs/amazon-kinesis-client/milestone/25)
-* Added method to retrieve leases from the LeaseCoordinator and LeaseTaker.
- * [PR #428](https://github.com/awslabs/amazon-kinesis-client/pull/428)
-* Fixed a race condition shutting down the Scheduler before it has completed initialization.
- * [PR #439](https://github.com/awslabs/amazon-kinesis-client/pull/439)
- * [Issue #427](https://github.com/awslabs/amazon-kinesis-client/issues/427)
-* Added `HierarchicalShardSyncer` which replaces the static `ShardSyncer`.
- `HierarchicalShardSyncer` removes the contention between multiple instances of the Scheduler when running under a single JVM.
- * [PR #395](https://github.com/awslabs/amazon-kinesis-client/pull/395)
- * [Issue #415](https://github.com/awslabs/amazon-kinesis-client/issues/415)
-* Added `TaskExecutionListener` which allows monitoring of tasks being executed by the `ShardConsumer`.
- The listener is invoked before and after a task is executed by the `ShardConsumer`.
- * [PR #417](https://github.com/awslabs/amazon-kinesis-client/pull/417)
-
-### Release 2.0.3 (October 8, 2018)
-[Milestone #23](https://github.com/awslabs/amazon-kinesis-client/milestone/23)
-* Fixed an issue where the `KinesisAsyncClient` could be misconfigured to use HTTP 1.1.
- Using HTTP 1.1 with `SubscribeToShard` is unsupported, and could cause misdelivery of records to the record processor.
- * [Issue #391](https://github.com/awslabs/amazon-kinesis-client/issues/391)
- * [PR #434](https://github.com/awslabs/amazon-kinesis-client/pull/434)
- * [PR #433](https://github.com/awslabs/amazon-kinesis-client/pull/433)
-* Lower the severity of `ReadTimeout` exceptions.
- `ReadTimeout` exceptions can occur if the client is unable to request data from Kinesis for more than client timeout, which defaults to 30 seconds. This can occur if the record processor blocks for more than the timeout period. `ReadTimeout` could also occur as part of [Issue #391](https://github.com/awslabs/amazon-kinesis-client/issues/391).
- * [Issue #399](https://github.com/awslabs/amazon-kinesis-client/issues/399)
- * [PR #403](https://github.com/awslabs/amazon-kinesis-client/pull/403)
-* Added a callback that allows applications to take actions after DynamoDB table creation.
- Applications can now install a callback that is called after creating the DynamoDB table by implementing `TableCreatorCallback`.
- * [PR #413](https://github.com/awslabs/amazon-kinesis-client/pull/413)
-* Updated the guava dependency to 26.0-jre.
- * [PR #420](https://github.com/awslabs/amazon-kinesis-client/pull/420)
- * [Issue #416](https://github.com/awslabs/amazon-kinesis-client/issues/416)
-* Added some additional debug logging around the initialization of the `FanOutRecordsPublisher`.
- * [PR #398](https://github.com/awslabs/amazon-kinesis-client/pull/398)
-* Upgraded AWS SDK version to 2.0.6
- * [PR #434](https://github.com/awslabs/amazon-kinesis-client/pull/434)
-
-
-### Release 2.0.2 (September 4, 2018)
-[Milestone #22](https://github.com/awslabs/amazon-kinesis-client/milestone/22)
-* Fixed an issue where the a warning would be logged every second if `logWarningForTaskAfterMillis` was set.
- The logging for last time of data arrival now respects the value of `logWarningForTaskAfterMillis`.
- * [PR #383](https://github.com/awslabs/amazon-kinesis-client/pull/383)
- * [Issue #381](https://github.com/awslabs/amazon-kinesis-client/issues/381)
-* Moved creation of `WorkerStateChangedListener` and `GracefulShutdownCoordinator` to the `CoordinatorConfig`.
- Originally the `WorkerStateChangedListener` and `GracefulShutdownCoordinator` were created by methods on the `SchedulerCoordinatorFactory`, but they should have been configuration options.
- The original methods have been deprecated, and may be removed at a later date.
- * [PR #385](https://github.com/awslabs/amazon-kinesis-client/pull/385)
- * [PR #388](https://github.com/awslabs/amazon-kinesis-client/pull/388)
-* Removed dependency on Apache Commons Lang 2.6.
- The dependency on Apache Commons Lang 2.6 has removed, and all usages updated to use Apache Commons Lang 3.7.
- * [PR #386](https://github.com/awslabs/amazon-kinesis-client/pull/386)
- * [Issue #370](https://github.com/awslabs/amazon-kinesis-client/issues/370)
-* Fixed a typo in the MutliLang Daemon shutdown hook.
- * [PR #387](https://github.com/awslabs/amazon-kinesis-client/pull/387)
-* Added method `onAllInitializationAttemptsFailed(Throwable)` to `WorkerStateChangedListener` to report when all initialization attempts have failed.
- This method is a default method, and it isn't require to implement the method. This method is only called after all attempts to initialize the `Scheduler` have failed.
- * [PR #369](https://github.com/awslabs/amazon-kinesis-client/pull/369)
-
-### Release 2.0.1 (August 21, 2018)
-* Mark certain internal components with `@KinesisClientInternalApi` attribute.
- Components marked as internal may be deprecated at a faster rate than public components.
- * [PR #358](https://github.com/awslabs/amazon-kinesis-client/pull/358)
-* Fixed an issue where `ResourceNotFoundException` on subscription to a shard was not triggering end of shard handling.
- If a lease table contains a shard that is no longer present in the stream attempt to subscribe to that shard will trigger a `ResourceNotFoundException`. These exception are treated the same as reaching the end of a shard.
- * [PR #359](https://github.com/awslabs/amazon-kinesis-client/pull/359)
-* Fixed an issue where the KCL would not Use the configured DynamoDB IOPs when creating the lease table.
- * [PR #360](https://github.com/awslabs/amazon-kinesis-client/pull/360)
-* Make the maximum number of Scheduler initialization attempts configurable.
- The maximum number of `Scheduler` initialization attempts can be configured via `CoordinatorConfig#maxInitializationAttempts`.
- * [PR #363](https://github.com/awslabs/amazon-kinesis-client/pull/363)
- * [PR #368](https://github.com/awslabs/amazon-kinesis-client/pull/368)
-* Fixed an issue where it was possible to get a duplicate record when resubscribing to a shard.
- Subscribe to shard requires periodic resubscribing, and uses a new concept of a continuation sequence number. If the continuation sequence number was equal to the last record that record would be processed a second time. Resubscribing now uses `AFTER_SEQUENCE_NUMBER` to ensure that only later records are returned.
- * [PR #371](https://github.com/awslabs/amazon-kinesis-client/pull/371)
-* Upgraded to AWS SDK 2.0.1
- * [PR #372](https://github.com/awslabs/amazon-kinesis-client/pull/372)
-* Fixed an issue where time based restart of the subscription wasn't resetting the `lastRequestTime`.
- If a subscription hasn't delivered any data for more than 30 seconds it will be canceled and restarted. This detection is based of the `lastRequestTime` which wasn't getting reset after the restart was triggered.
- * [PR #373](https://github.com/awslabs/amazon-kinesis-client/pull/373)
-* Fixed an issue where requesting on the subscription from the `FanOutRecordsPublisher` could trigger an unexpected failure.
- Due to a race condition the underlying flow in the subscription could be set to something else. The method is now synchronized, and verifies that the subscriber it was created with is still the subscriber in affect.
- This issue generally would only appear when multiple errors were occurring while connecting to Kinesis.
- * [PR #374](https://github.com/awslabs/amazon-kinesis-client/pull/374)
-* Fixed an issue where the number of requested items could exceed the capacity of the RxJava queue.
- There was an off by one issue when determining whether to make a request to the SDK subscription. This changes the calculation to represent the capacity as a queue.
- * [PR #375](https://github.com/awslabs/amazon-kinesis-client/pull/375)
-
-### Release 2.0.0 (August 02, 2018)
-* The Maven `groupId`, along with the `version`, for the Amazon Kinesis Client has changed from `com.amazonaws` to `software.amazon.kinesis`.
- To add a dependency on the new version of the Amazon Kinesis Client:
- ``` xml
-
- software.amazon.kinesis
- amazon-kinesis-client
- 2.0.0
-
- ```
-* Added support for Enhanced Fan Out.
- Enhanced Fan Out provides for lower end to end latency, and increased number of consumers per stream.
- * Records are now delivered via streaming, reducing end-to-end latency.
- * The Amazon Kinesis Client will automatically register a new consumer if required.
- When registering a new consumer, the Kinesis Client will default to the application name unless configured otherwise.
- * `SubscribeToShard` maintains long lived connections with Kinesis, which in the AWS Java SDK 2.0 is limited by default.
- The `KinesisClientUtil` has been added to assist configuring the `maxConcurrency` of the `KinesisAsyncClient`.
- __WARNING: The Amazon Kinesis Client may see significantly increased latency, unless the `KinesisAsyncClient` is configured to have a `maxConcurrency` high enough to allow all leases plus additional usages of the `KinesisAsyncClient`.__
- * The Amazon Kinesis Client now uses 3 additional Kinesis API's:
- __WARNING: If using a restrictive Kinesis IAM policy you may need to add the following API methods to the policy.__
- * [`SubscribeToShard`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_SubscribeToShard.html)
- * [`DescribeStreamSummary`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_DescribeStreamSummary.html)
- * [`DescribeStreamConsumer`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_DescribeStreamConsumer.html)
- * [`RegisterStreamConsumer`](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_RegisterStreamConsumer.html)
- * New configuration options are available to configure Enhanced Fan Out.
-
- | Name | Default | Description |
- |-----------------|---------|---------------------------------------------------------------------------------------------------------------------|
- | consumerArn | Unset | The ARN for an already created consumer. If this is set, the Kinesis Client will not attempt to create a consumer. |
- | streamName | Unset | The name of the stream that a consumer should be create for if necessary |
- | consumerName | Unset | The name of the consumer to create. If this is not set the applicationName will be used instead. |
- | applicationName | Unset | The name of the application. This is used as the name of the consumer unless consumerName is set. |
-
-* Modular Configuration of the Kinesis Client
- The Kinesis Client has migrated to a modular configuration system, and the `KinesisClientLibConfiguration` class has been removed.
- Configuration has been split into 7 classes. Default versions of the configuration can be created from the `ConfigsBuilder`.
- Please [see the migration guide for more information][migration-guide].
- * `CheckpointConfig`
- * `CoordinatorConfig`
- * `LeaseManagementConfig`
- * `LifecycleConfig`
- * `MetricsConfig`
- * `ProcessorConfig`
- * `RetrievalConfig`
-
-* Upgraded to AWS Java SDK 2.0
- The Kinesis Client now uses the AWS Java SDK 2.0. The dependency on AWS Java SDK 1.11 has been removed.
- All configurations will only accept 2.0 clients.
- * When configuring the `KinesisAsyncClient` the `KinesisClientUtil#createKinesisAsyncClient` can be used to configure the Kinesis Client
- * __If you need support for AWS Java SDK 1.11 you will need to add a direct dependency.__
- __When adding a dependency you must ensure that the 1.11 versions of Jackson dependencies are excluded__
- [Please see the migration guide for more information][migration-guide]
-
-* MultiLangDaemon is now a separate module
- The MultiLangDaemon has been separated to its own Maven module and is no longer available in `amazon-kinesis-client`. To include the MultiLangDaemon, add a dependency on `amazon-kinesis-client-multilang`.
-
-[kinesis]: http://aws.amazon.com/kinesis
-[kinesis-forum]: http://developer.amazonwebservices.com/connect/forum.jspa?forumID=169
-[kinesis-client-library-issues]: https://github.com/awslabs/amazon-kinesis-client/issues
-[docs-signup]: http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/java-dg-setup.html
-[kinesis-guide]: http://docs.aws.amazon.com/kinesis/latest/dev/introduction.html
-[kinesis-guide-begin]: http://docs.aws.amazon.com/kinesis/latest/dev/before-you-begin.html
-[kinesis-guide-create]: http://docs.aws.amazon.com/kinesis/latest/dev/step-one-create-stream.html
-[kinesis-guide-applications]: http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-app.html
-[kinesis-guide-monitoring-with-kcl]: http://docs.aws.amazon.com//kinesis/latest/dev/monitoring-with-kcl.html
-[kinesis-guide-kpl]: http://docs.aws.amazon.com//kinesis/latest/dev/developing-producers-with-kpl.html
-[kinesis-guide-consumer-deaggregation]: http://docs.aws.amazon.com//kinesis/latest/dev/kinesis-kpl-consumer-deaggregation.html
-[kclpy]: https://github.com/awslabs/amazon-kinesis-client-python
-[multi-lang-protocol]: https://github.com/awslabs/amazon-kinesis-client/blob/master/src/main/java/com/amazonaws/services/kinesis/multilang/package-info.java
+---
+### Release 3.0.0 (November 06, 2024)
+* New lease assignment / load balancing algorithm
+ * KCL 3.x introduces a new lease assignment and load balancing algorithm. It assigns leases among workers based on worker utilization metrics and throughput on each lease, replacing the previous lease count-based lease assignment algorithm.
+ * When KCL detects higher variance in CPU utilization among workers, it proactively reassigns leases from over-utilized workers to under-utilized workers for even load balancing. This ensures even CPU utilization across workers and removes the need to over-provision the stream processing compute hosts.
+* Optimized DynamoDB RCU usage
+ * KCL 3.x optimizes DynamoDB read capacity unit (RCU) usage on the lease table by implementing a global secondary index with leaseOwner as the partition key. This index mirrors the leaseKey attribute from the base lease table, allowing workers to efficiently discover their assigned leases by querying the index instead of scanning the entire table.
+ * This approach significantly reduces read operations compared to earlier KCL versions, where workers performed full table scans, resulting in higher RCU consumption.
+* Graceful lease handoff
+ * KCL 3.x introduces a feature called "graceful lease handoff" to minimize data reprocessing during lease reassignments. Graceful lease handoff allows the current worker to complete checkpointing of processed records before transferring the lease to another worker. For graceful lease handoff, you should implement checkpointing logic within the existing `shutdownRequested()` method.
+ * This feature is enabled by default in KCL 3.x, but you can turn off this feature by adjusting the configuration property `isGracefulLeaseHandoffEnabled`.
+ * While this approach significantly reduces the probability of data reprocessing during lease transfers, it doesn't completely eliminate the possibility. To maintain data integrity and consistency, it's crucial to design your downstream consumer applications to be idempotent. This ensures that the application can handle potential duplicate record processing without adverse effects.
+* New DynamoDB metadata management artifacts
+ * KCL 3.x introduces two new DynamoDB tables for improved lease management:
+ * Worker metrics table: Records CPU utilization metrics from each worker. KCL uses these metrics for optimal lease assignments, balancing resource utilization across workers. If CPU utilization metric is not available, KCL assigns leases to balance the total sum of shard throughput per worker instead.
+ * Coordinator state table: Stores internal state information for workers. Used to coordinate in-place migration from KCL 2.x to KCL 3.x and leader election among workers.
+ * Follow this [documentation](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html#kcl-migration-from-2-3-IAM-permissions) to add required IAM permissions for your KCL application.
+* Other improvements and changes
+ * Dependency on the AWS SDK for Java 1.x has been fully removed.
+ * The Glue Schema Registry integration functionality no longer depends on AWS SDK for Java 1.x. Previously, it required this as a transient dependency.
+ * Multilangdaemon has been upgraded to use AWS SDK for Java 2.x. It no longer depends on AWS SDK for Java 1.x.
+ * `idleTimeBetweenReadsInMillis` (PollingConfig) now has a minimum default value of 200.
+ * This polling configuration property determines the [publishers](https://github.com/awslabs/amazon-kinesis-client/blob/master/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java) wait time between GetRecords calls in both success and failure cases. Previously, setting this value below 200 caused unnecessary throttling. This is because Amazon Kinesis Data Streams supports up to five read transactions per second per shard for shared-throughput consumers.
+ * Shard lifecycle management is improved to deal with edge cases around shard splits and merges to ensure records continue being processed as expected.
+* Migration
+ * The programming interfaces of KCL 3.x remain identical with KCL 2.x for an easier migration, with the exception of those applications that do not use the recommended approach of using the Config Builder. These applications will have to refer to [the troubleshooting guide](https://docs.aws.amazon.com/streams/latest/dev/troubleshooting-consumers.html#compiliation-error-leasemanagementconfig). For detailed migration instructions, please refer to the [Migrate consumers from KCL 2.x to KCL 3.x](https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-2-3.html) page in the Amazon Kinesis Data Streams developer guide.
+* Configuration properties
+ * New configuration properties introduced in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#new-configurations-in-kcl-3x).
+ * Deprecated configuration properties in KCL 3.x are listed in this [doc](https://github.com/awslabs/amazon-kinesis-client/blob/master/docs/kcl-configurations.md#discontinued-configuration-properties-in-kcl-3x). You need to keep the deprecated configuration properties during the migration from any previous KCL version to KCL 3.x.
+* Metrics
+ * New CloudWatch metrics introduced in KCL 3.x are explained in the [Monitor the Kinesis Client Library with Amazon CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) in the Amazon Kinesis Data Streams developer guide. The following operations are newly added in KCL 3.x:
+ * `LeaseAssignmentManager`
+ * `WorkerMetricStatsReporter`
+ * `LeaseDiscovery`
diff --git a/README.md b/README.md
index 6737c4b1f..284e71b65 100644
--- a/README.md
+++ b/README.md
@@ -1,41 +1,38 @@
# Amazon Kinesis Client Library for Java
[![Build Status](https://travis-ci.org/awslabs/amazon-kinesis-client.svg?branch=master)](https://travis-ci.org/awslabs/amazon-kinesis-client)
-The **Amazon Kinesis Client Library for Java** (Amazon KCL) enables Java developers to easily consume and process data from [Amazon Kinesis][kinesis].
+The **Amazon Kinesis Client Library (KCL) for Java** enables Java developers to easily consume and process data from [Amazon Kinesis Data Streams][kinesis].
-* [Kinesis Product Page][kinesis]
-* [Forum][kinesis-forum]
+* [Kinesis Data Streams Product Page][kinesis]
+* [Amazon re:Post Forum: Kinesis][kinesis-forum]
* [Javadoc][kcl-javadoc]
* [FAQ](docs/FAQ.md)
-* [KCL Documentation](docs/) (folder)
+* [Developer Guide - Kinesis Client Library][kcl-aws-doc]
+* [KCL GitHub documentation](docs/) (folder)
* [Issues][kinesis-client-library-issues]
-
-### Recommended Upgrade for All Users of the 1.x Amazon Kinesis Client
-:warning: We recommend customers to migrate to 1.14.1 or newer to avoid [known bugs](https://github.com/awslabs/amazon-kinesis-client/issues/778) in 1.14.0 version
-
-### Recommended Upgrade for All Users of the 2.x Amazon Kinesis Client
-**:warning: It's highly recommended for users of version 2.0 of the Amazon Kinesis Client to upgrade to version 2.0.3 or later. A [bug has been](https://github.com/awslabs/amazon-kinesis-client/issues/391) identified in versions prior to 2.0.3 that could cause records to be delivered to the wrong record processor.**
-
-**:information_source: Amazon Kinesis Client versions 1.x are not impacted.**
-
-Please open an issue if you have any questions.
+* [Giving Feedback][giving-feedback]
## Features
-* Provides an easy-to-use programming model for processing data using Amazon Kinesis
-* Helps with scale-out and fault-tolerant processing
+* **Scalability:** KCL enables applications to scale dynamically by distributing the processing load across multiple workers. You can scale your application in or out, manually or with auto-scaling, without worrying about load redistribution.
+* **Load balancing:** KCL automatically balances the processing load across available workers, resulting in an even distribution of work across workers.
+* **Checkpointing:** KCL manages checkpointing of processed records, enabling applications to resume processing from their last sucessfully processed position.
+* **Fault tolerance:** KCL provides built-in fault tolerance mechanisms, making sure that data processing continues even if individual workers fail. KCL also provides at-least-once delivery.
+* **Handling stream-level changes:** KCL adapts to shard splits and merges that might occur due to changes in data volume. It maintains ordering by making sure that child shards are processed only after their parent shard is completed and checkpointed.
+* **Monitoring:** KCL integrates with Amazon CloudWatch for consumer-level monitoring.
+* **Multi-language support:** KCL natively supports Java and enables multiple non-Java programming languages through MultiLangDaemon.
## Getting Started
1. **Sign up for AWS** — Before you begin, you need an AWS account. For more information about creating an AWS account and retrieving your AWS credentials, see [AWS Account and Credentials][docs-signup] in the AWS SDK for Java Developer Guide.
-1. **Sign up for Amazon Kinesis** — Go to the Amazon Kinesis console to sign up for the service and create an Amazon Kinesis stream. For more information, see [Create an Amazon Kinesis Stream][kinesis-guide-create] in the Amazon Kinesis Developer Guide.
-1. **Minimum requirements** — To use the Amazon Kinesis Client Library, you'll need **Java 1.8+**. For more information about Amazon Kinesis Client Library requirements, see [Before You Begin][kinesis-guide-begin] in the Amazon Kinesis Developer Guide.
-1. **Using the Amazon Kinesis Client Library** — The best way to get familiar with the Amazon Kinesis Client Library is to read [Developing Record Consumer Applications][kinesis-guide-applications] in the Amazon Kinesis Developer Guide.
+2. **Sign up for Amazon Kinesis** — Go to the Amazon Kinesis console to sign up for the service and create an Amazon Kinesis stream. For more information, see [Create an Amazon Kinesis Stream][kinesis-guide-create] in the Amazon Kinesis Developer Guide.
+3. **Minimum requirements** — To use the Amazon Kinesis Client Library, you will need **Java 1.8+**. For more information about Amazon Kinesis Client Library requirements, see [Before You Begin][kinesis-guide-begin] in the Amazon Kinesis Developer Guide.
+4. **Using the Amazon Kinesis Client Library** — The best way to get familiar with the Amazon Kinesis Client Library is to read [Use Kinesis Client Library][kinesis-guide-applications] in the Amazon Kinesis Data Streams Developer Guide. For more information on core KCL concepts, please refer to the [KCL Concepts][kinesis-client-library-concepts] page.
## Building from Source
-After you've downloaded the code from GitHub, you can build it using Maven. To disable GPG signing in the build, use
-this command: `mvn clean install -Dgpg.skip=true`.
+After you have downloaded the code from GitHub, you can build it using Maven. To disable GPG signing in the build, use
+this command: `mvn clean install -Dgpg.skip=true`.
Note: This command does not run integration tests.
To disable running unit tests in the build, add the property `-Dskip.ut=true`.
@@ -58,7 +55,17 @@ To make it easier for developers to write record processors in other languages,
## Using the KCL
The recommended way to use the KCL for Java is to consume it from Maven.
+### Version 3.x
+ ``` xml
+
+ software.amazon.kinesis
+ amazon-kinesis-client
+ 3.0.0
+
+ ```
+
### Version 2.x
+[Version 2.x tracking branch](https://github.com/awslabs/amazon-kinesis-client/tree/v2.x)
``` xml
software.amazon.kinesis
@@ -77,13 +84,36 @@ The recommended way to use the KCL for Java is to consume it from Maven.
```
+> **IMPORTANT**
+> We recommend using the latest KCL version for improved performance and support.
+
## Release Notes
| KCL Version | Changelog |
| --- | --- |
-| 2.x | [master/CHANGELOG.md](CHANGELOG.md) |
+| 3.x | [master/CHANGELOG.md](CHANGELOG.md) |
+| 2.x | [v2.x/CHANGELOG.md](https://github.com/awslabs/amazon-kinesis-client/blob/v2.x/CHANGELOG.md) |
| 1.x | [v1.x/CHANGELOG.md](https://github.com/awslabs/amazon-kinesis-client/blob/v1.x/CHANGELOG.md) |
+## Notices
+
+### Recommended Upgrade for All Users of the 1.x Amazon Kinesis Client
+We recommend customers to migrate to 1.14.1 or newer to avoid [known bugs](https://github.com/awslabs/amazon-kinesis-client/issues/778) in 1.14.0 version
+
+### Recommended Upgrade for All Users of the 2.x Amazon Kinesis Client
+It's highly recommended for users of version 2.0 of the Amazon Kinesis Client to upgrade to version 2.0.3 or later. A [bug has been](https://github.com/awslabs/amazon-kinesis-client/issues/391) identified in versions prior to 2.0.3 that could cause records to be delivered to the wrong record processor.**
+
+## Giving Feedback
+
+Help Us Improve the Kinesis Client Library! Your involvement is crucial to enhancing the Kinesis Client Library. We invite you to join our community and contribute in the following ways:
+
+* [Issue](https://github.com/awslabs/amazon-kinesis-client/issues) Reporting: This is our preferred method of communication. Use this channel to report bugs, suggest improvements, or ask questions.
+* Feature Requests: Share your ideas for new features or vote for existing proposals on our [Issues](https://github.com/awslabs/amazon-kinesis-client/issues) page. This helps us prioritize development efforts.
+* Participate in Discussions: Engage with other users and our team in our discussion forums.
+* Submit [Pull Requests](https://github.com/awslabs/amazon-kinesis-client/pulls): If you have developed a fix or improvement, we welcome your code contributions.
+
+By participating through these channels, you play a vital role in shaping the future of the Kinesis Client Library. We value your input and look forward to collaborating with you!
+
[docs-signup]: http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/java-dg-setup.html
[kcl-javadoc]: https://javadoc.io/doc/software.amazon.kinesis/amazon-kinesis-client/
[kinesis]: http://aws.amazon.com/kinesis
@@ -98,4 +128,7 @@ The recommended way to use the KCL for Java is to consume it from Maven.
[kinesis-guide-consumer-deaggregation]: http://docs.aws.amazon.com//kinesis/latest/dev/kinesis-kpl-consumer-deaggregation.html
[kclpy]: https://github.com/awslabs/amazon-kinesis-client-python
[multi-lang-protocol]: /amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/package-info.java
-[migration-guide]: https://docs.aws.amazon.com/streams/latest/dev/kcl-migration.html
+[migration-guide]: https://docs.aws.amazon.com/streams/latest/dev/kcl-migration-from-previous-versions
+[kcl-sample]: https://docs.aws.amazon.com/streams/latest/dev/kcl-example-code
+[kcl-aws-doc]: https://docs.aws.amazon.com/streams/latest/dev/kcl.html
+[giving-feedback]: https://github.com/awslabs/amazon-kinesis-client?tab=readme-ov-file#giving-feedback
\ No newline at end of file
diff --git a/amazon-kinesis-client-multilang/pom.xml b/amazon-kinesis-client-multilang/pom.xml
index c972b61c2..6fed93f75 100644
--- a/amazon-kinesis-client-multilang/pom.xml
+++ b/amazon-kinesis-client-multilang/pom.xml
@@ -21,16 +21,12 @@
amazon-kinesis-client-pom
software.amazon.kinesis
- 2.6.1-SNAPSHOT
+ 3.0.0
4.0.0
amazon-kinesis-client-multilang
-
- 1.12.668
-
-
software.amazon.kinesis
@@ -43,36 +39,10 @@
${awssdk.version}
-
- com.amazonaws
- aws-java-sdk-core
- ${aws-java-sdk.version}
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- com.fasterxml.jackson.dataformat
- jackson-dataformat-cbor
-
-
- org.apache.httpcomponents
- httpclient
-
-
-
-
-
- com.amazonaws
- aws-java-sdk-sts
- ${aws-java-sdk.version}
-
-
org.projectlombok
lombok
- 1.18.24
+ 1.18.28
provided
@@ -104,6 +74,12 @@
+
+ org.junit.jupiter
+ junit-jupiter-api
+ 5.11.3
+ test
+
junit
junit
@@ -122,6 +98,13 @@
1.3
test
+
+
+ org.mockito
+ mockito-junit-jupiter
+ 3.12.4
+ test
+
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java
index 4588b2465..feb35ad49 100644
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java
@@ -61,10 +61,10 @@
* applicationName = PythonKCLSample
*
* # Users can change the credentials provider the KCL will use to retrieve credentials.
- * # The DefaultAWSCredentialsProviderChain checks several other providers, which is
+ * # The DefaultCredentialsProvider checks several other providers, which is
* # described here:
- * # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
- * AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
+ * # https://sdk.amazonaws.com/java/api/2.0.0-preview-11/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
+ * AwsCredentialsProvider = DefaultCredentialsProvider
*
*/
@Slf4j
@@ -141,7 +141,7 @@ void configureLogging(
}
}
- String propertiesFile(final MultiLangDaemonArguments arguments) {
+ String validateAndGetPropertiesFileName(final MultiLangDaemonArguments arguments) {
String propertiesFile = "";
if (CollectionUtils.isNotEmpty(arguments.parameters)) {
@@ -216,9 +216,9 @@ public static void main(final String[] args) {
MultiLangDaemonArguments arguments = new MultiLangDaemonArguments();
JCommander jCommander = daemon.buildJCommanderAndParseArgs(arguments, args);
try {
- String propertiesFile = daemon.propertiesFile(arguments);
+ String propertiesFileName = daemon.validateAndGetPropertiesFileName(arguments);
daemon.configureLogging(arguments.logConfiguration);
- MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFile);
+ MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFileName);
Scheduler scheduler = daemon.buildScheduler(config);
MultiLangRunner runner = new MultiLangRunner(scheduler);
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java
index 192118220..13acfeb15 100644
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java
@@ -15,13 +15,14 @@
package software.amazon.kinesis.multilang;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
-import com.amazonaws.regions.Regions;
import com.google.common.base.CaseFormat;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.regions.Region;
/**
* Key-Value pairs which may be nested in, and extracted from, a property value
@@ -73,8 +74,13 @@ void visit(final NestedPropertyProcessor processor, final String endpoint) {
* @see Available Regions
*/
ENDPOINT_REGION {
- void visit(final NestedPropertyProcessor processor, final String region) {
- processor.acceptEndpointRegion(Regions.fromName(region));
+ void visit(final NestedPropertyProcessor processor, final String regionName) {
+ List validRegions = Region.regions();
+ Region region = Region.of(regionName);
+ if (!validRegions.contains(region)) {
+ throw new IllegalArgumentException("Invalid region name: " + regionName);
+ }
+ processor.acceptEndpointRegion(region);
}
},
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java
index f7587297f..fee335143 100644
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java
@@ -14,7 +14,7 @@
*/
package software.amazon.kinesis.multilang;
-import com.amazonaws.regions.Regions;
+import software.amazon.awssdk.regions.Region;
/**
* Defines methods to process {@link NestedPropertyKey}s.
@@ -26,11 +26,11 @@ public interface NestedPropertyProcessor {
*
* @param serviceEndpoint the service endpoint either with or without the protocol
* (e.g., https://sns.us-west-1.amazonaws.com, sns.us-west-1.amazonaws.com)
- * @param signingRegion the region to use for SigV4 signing of requests (e.g. us-west-1)
+ * @param signingRegion the region to use for the client (e.g. us-west-1)
*
- * @see #acceptEndpointRegion(Regions)
- * @see
- * AwsClientBuilder.EndpointConfiguration
+ * @see #acceptEndpointRegion(Region)
+ * @see
+ * AwsClientBuilder.endpointOverride
*/
void acceptEndpoint(String serviceEndpoint, String signingRegion);
@@ -42,7 +42,7 @@ public interface NestedPropertyProcessor {
*
* @see #acceptEndpoint(String, String)
*/
- void acceptEndpointRegion(Regions region);
+ void acceptEndpointRegion(Region region);
/**
* Set the external id, an optional field to designate who can assume an IAM role.
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java
deleted file mode 100644
index b5b9f924c..000000000
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2023 Amazon.com, Inc. or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package software.amazon.kinesis.multilang.auth;
-
-import java.util.Arrays;
-
-import com.amazonaws.auth.AWSSessionCredentials;
-import com.amazonaws.auth.AWSSessionCredentialsProvider;
-import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
-import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.Builder;
-import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
-import com.amazonaws.regions.Regions;
-import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
-import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
-import software.amazon.kinesis.multilang.NestedPropertyKey;
-import software.amazon.kinesis.multilang.NestedPropertyProcessor;
-
-/**
- * An {@link AWSSessionCredentialsProvider} that is backed by STSAssumeRole.
- */
-public class KclSTSAssumeRoleSessionCredentialsProvider
- implements AWSSessionCredentialsProvider, NestedPropertyProcessor {
-
- private final Builder builder;
-
- private final STSAssumeRoleSessionCredentialsProvider provider;
-
- /**
- *
- * @param params vararg parameters which must include roleArn at index=0,
- * and roleSessionName at index=1
- */
- public KclSTSAssumeRoleSessionCredentialsProvider(final String[] params) {
- this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length));
- }
-
- public KclSTSAssumeRoleSessionCredentialsProvider(
- final String roleArn, final String roleSessionName, final String... params) {
- builder = new Builder(roleArn, roleSessionName);
- NestedPropertyKey.parse(this, params);
- provider = builder.build();
- }
-
- @Override
- public AWSSessionCredentials getCredentials() {
- return provider.getCredentials();
- }
-
- @Override
- public void refresh() {
- // do nothing
- }
-
- @Override
- public void acceptEndpoint(final String serviceEndpoint, final String signingRegion) {
- final EndpointConfiguration endpoint = new EndpointConfiguration(serviceEndpoint, signingRegion);
- final AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClient.builder()
- .withEndpointConfiguration(endpoint)
- .build();
- builder.withStsClient(stsClient);
- }
-
- @Override
- public void acceptEndpointRegion(final Regions region) {
- final AWSSecurityTokenService stsClient =
- AWSSecurityTokenServiceClient.builder().withRegion(region).build();
- builder.withStsClient(stsClient);
- }
-
- @Override
- public void acceptExternalId(final String externalId) {
- builder.withExternalId(externalId);
- }
-}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java
new file mode 100644
index 000000000..96ecfdd68
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.multilang.auth;
+
+import java.net.URI;
+import java.util.Arrays;
+
+import software.amazon.awssdk.auth.credentials.AwsCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.sts.StsClient;
+import software.amazon.awssdk.services.sts.StsClientBuilder;
+import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
+import software.amazon.awssdk.services.sts.model.AssumeRoleRequest;
+import software.amazon.awssdk.services.sts.model.AssumeRoleRequest.Builder;
+import software.amazon.kinesis.multilang.NestedPropertyKey;
+import software.amazon.kinesis.multilang.NestedPropertyProcessor;
+
+public class KclStsAssumeRoleCredentialsProvider implements AwsCredentialsProvider, NestedPropertyProcessor {
+ private final Builder assumeRoleRequestBuilder;
+ private final StsClientBuilder stsClientBuilder;
+ private final StsAssumeRoleCredentialsProvider stsAssumeRoleCredentialsProvider;
+
+ public KclStsAssumeRoleCredentialsProvider(String[] params) {
+ this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length));
+ }
+
+ public KclStsAssumeRoleCredentialsProvider(String roleArn, String roleSessionName, String... params) {
+ this.assumeRoleRequestBuilder =
+ AssumeRoleRequest.builder().roleArn(roleArn).roleSessionName(roleSessionName);
+ this.stsClientBuilder = StsClient.builder();
+ NestedPropertyKey.parse(this, params);
+ this.stsAssumeRoleCredentialsProvider = StsAssumeRoleCredentialsProvider.builder()
+ .refreshRequest(assumeRoleRequestBuilder.build())
+ .asyncCredentialUpdateEnabled(true)
+ .stsClient(stsClientBuilder.build())
+ .build();
+ }
+
+ @Override
+ public AwsCredentials resolveCredentials() {
+ return stsAssumeRoleCredentialsProvider.resolveCredentials();
+ }
+
+ @Override
+ public void acceptEndpoint(String serviceEndpoint, String signingRegion) {
+ if (!serviceEndpoint.startsWith("http://") && !serviceEndpoint.startsWith("https://")) {
+ serviceEndpoint = "https://" + serviceEndpoint;
+ }
+ stsClientBuilder.endpointOverride(URI.create(serviceEndpoint));
+ stsClientBuilder.region(Region.of(signingRegion));
+ }
+
+ @Override
+ public void acceptEndpointRegion(Region region) {
+ stsClientBuilder.region(region);
+ }
+
+ @Override
+ public void acceptExternalId(String externalId) {
+ assumeRoleRequestBuilder.externalId(externalId);
+ }
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoder.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoder.java
deleted file mode 100644
index 8110d4f77..000000000
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoder.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright 2019 Amazon.com, Inc. or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package software.amazon.kinesis.multilang.config;
-
-import java.lang.reflect.InvocationTargetException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.auth.AWSCredentialsProviderChain;
-import lombok.extern.slf4j.Slf4j;
-
-/**
- * Get AWSCredentialsProvider property.
- */
-@Slf4j
-class AWSCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder {
- private static final String LIST_DELIMITER = ",";
- private static final String ARG_DELIMITER = "|";
-
- /**
- * Constructor.
- */
- AWSCredentialsProviderPropertyValueDecoder() {}
-
- /**
- * Get AWSCredentialsProvider property.
- *
- * @param value
- * property value as String
- * @return corresponding variable in correct type
- */
- @Override
- public AWSCredentialsProvider decodeValue(String value) {
- if (value != null) {
- List providerNames = getProviderNames(value);
- List providers = getValidCredentialsProviders(providerNames);
- AWSCredentialsProvider[] ps = new AWSCredentialsProvider[providers.size()];
- providers.toArray(ps);
- return new AWSCredentialsProviderChain(providers);
- } else {
- throw new IllegalArgumentException("Property AWSCredentialsProvider is missing.");
- }
- }
-
- /**
- * @return list of supported types
- */
- @Override
- public List> getSupportedTypes() {
- return Collections.singletonList(AWSCredentialsProvider.class);
- }
-
- /**
- * Convert string list to a list of valid credentials providers.
- */
- private static List getValidCredentialsProviders(List providerNames) {
- List credentialsProviders = new ArrayList<>();
-
- for (String providerName : providerNames) {
- final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER);
- final Class extends AWSCredentialsProvider> clazz;
- try {
- final Class> c = Class.forName(nameAndArgs[0]);
- if (!AWSCredentialsProvider.class.isAssignableFrom(c)) {
- continue;
- }
- clazz = (Class extends AWSCredentialsProvider>) c;
- } catch (ClassNotFoundException cnfe) {
- // Providers are a product of prefixed Strings to cover multiple
- // namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }).
- // It's expected that many class names will not resolve.
- continue;
- }
- log.info("Attempting to construct {}", clazz);
-
- AWSCredentialsProvider provider = null;
- if (nameAndArgs.length > 1) {
- final String[] varargs = Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length);
-
- // attempt to invoke an explicit N-arg constructor of FooClass(String, String, ...)
- provider = constructProvider(providerName, () -> {
- Class>[] argTypes = new Class>[nameAndArgs.length - 1];
- Arrays.fill(argTypes, String.class);
- return clazz.getConstructor(argTypes).newInstance(varargs);
- });
-
- if (provider == null) {
- // attempt to invoke a public varargs/array constructor of FooClass(String[])
- provider = constructProvider(providerName, () -> clazz.getConstructor(String[].class)
- .newInstance((Object) varargs));
- }
- }
-
- if (provider == null) {
- // regardless of parameters, fallback to invoke a public no-arg constructor
- provider = constructProvider(providerName, clazz::newInstance);
- }
-
- if (provider != null) {
- credentialsProviders.add(provider);
- }
- }
- return credentialsProviders;
- }
-
- private static List getProviderNames(String property) {
- // assume list delimiter is ","
- String[] elements = property.split(LIST_DELIMITER);
- List result = new ArrayList<>();
- for (int i = 0; i < elements.length; i++) {
- String string = elements[i].trim();
- if (!string.isEmpty()) {
- // find all possible names and add them to name list
- result.addAll(getPossibleFullClassNames(string));
- }
- }
- return result;
- }
-
- private static List getPossibleFullClassNames(final String provider) {
- return Stream.of(
- // Customer provides a short name of common providers in com.amazonaws.auth package
- // (e.g., any classes implementing the AWSCredentialsProvider interface)
- // @see
- // http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/AWSCredentialsProvider.html
- "com.amazonaws.auth.",
-
- // Customer provides a short name of a provider offered by this multi-lang package
- "software.amazon.kinesis.multilang.auth.",
-
- // Customer provides a fully-qualified provider name, or a custom credentials provider
- // (e.g., com.amazonaws.auth.ClasspathFileCredentialsProvider, org.mycompany.FooProvider)
- "")
- .map(prefix -> prefix + provider)
- .collect(Collectors.toList());
- }
-
- @FunctionalInterface
- private interface CredentialsProviderConstructor {
- T construct()
- throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException;
- }
-
- /**
- * Attempts to construct an {@link AWSCredentialsProvider}.
- *
- * @param providerName Raw, unmodified provider name. Should there be an
- * Exeception during construction, this parameter will be logged.
- * @param constructor supplier-like function that will perform the construction
- * @return the constructed provider, if successful; otherwise, null
- *
- * @param type of the CredentialsProvider to construct
- */
- private static T constructProvider(
- final String providerName, final CredentialsProviderConstructor constructor) {
- try {
- return constructor.construct();
- } catch (NoSuchMethodException ignored) {
- // ignore
- } catch (IllegalAccessException | InstantiationException | InvocationTargetException | RuntimeException e) {
- log.warn("Failed to construct {}", providerName, e);
- }
- return null;
- }
-}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoder.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoder.java
new file mode 100644
index 000000000..fd29a3dba
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoder.java
@@ -0,0 +1,261 @@
+/*
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.multilang.config;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain;
+import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
+import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider;
+
+/**
+ * Get AwsCredentialsProvider property.
+ */
+@Slf4j
+class AwsCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder {
+ private static final String LIST_DELIMITER = ",";
+ private static final String ARG_DELIMITER = "|";
+
+ /**
+ * Constructor.
+ */
+ AwsCredentialsProviderPropertyValueDecoder() {}
+
+ /**
+ * Get AwsCredentialsProvider property.
+ *
+ * @param value
+ * property value as String
+ * @return corresponding variable in correct type
+ */
+ @Override
+ public AwsCredentialsProvider decodeValue(String value) {
+ if (value != null) {
+ List providerNames = getProviderNames(value);
+ List providers = getValidCredentialsProviders(providerNames);
+ AwsCredentialsProvider[] ps = new AwsCredentialsProvider[providers.size()];
+ providers.toArray(ps);
+ if (providers.isEmpty()) {
+ log.warn("Unable to construct any provider with name {}", value);
+ log.warn("Please verify that all AwsCredentialsProvider properties are passed correctly");
+ }
+ return AwsCredentialsProviderChain.builder()
+ .credentialsProviders(providers)
+ .build();
+ } else {
+ throw new IllegalArgumentException("Property AwsCredentialsProvider is missing.");
+ }
+ }
+
+ /**
+ * @return list of supported types
+ */
+ @Override
+ public List> getSupportedTypes() {
+ return Collections.singletonList(AwsCredentialsProvider.class);
+ }
+
+ /**
+ * Convert string list to a list of valid credentials providers.
+ */
+ private static List getValidCredentialsProviders(List providerNames) {
+ List credentialsProviders = new ArrayList<>();
+
+ for (String providerName : providerNames) {
+ final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER);
+ final Class extends AwsCredentialsProvider> clazz = getClass(nameAndArgs[0]);
+ if (clazz == null) {
+ continue;
+ }
+ log.info("Attempting to construct {}", clazz);
+ final String[] varargs =
+ nameAndArgs.length > 1 ? Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length) : new String[0];
+ AwsCredentialsProvider provider = tryConstructor(providerName, clazz, varargs);
+ if (provider == null) {
+ provider = tryCreate(providerName, clazz, varargs);
+ }
+ if (provider != null) {
+ log.info("Provider constructed successfully: {}", provider);
+ credentialsProviders.add(provider);
+ }
+ }
+ return credentialsProviders;
+ }
+
+ private static AwsCredentialsProvider tryConstructor(
+ String providerName, Class extends AwsCredentialsProvider> clazz, String[] varargs) {
+ AwsCredentialsProvider provider =
+ constructProvider(providerName, () -> getConstructorWithVarArgs(clazz, varargs));
+ if (provider == null) {
+ provider = constructProvider(providerName, () -> getConstructorWithArgs(clazz, varargs));
+ }
+ if (provider == null) {
+ provider = constructProvider(providerName, clazz::newInstance);
+ }
+ return provider;
+ }
+
+ private static AwsCredentialsProvider tryCreate(
+ String providerName, Class extends AwsCredentialsProvider> clazz, String[] varargs) {
+ AwsCredentialsProvider provider =
+ constructProvider(providerName, () -> getCreateMethod(clazz, (Object) varargs));
+ if (provider == null) {
+ provider = constructProvider(providerName, () -> getCreateMethod(clazz, varargs));
+ }
+ if (provider == null) {
+ provider = constructProvider(providerName, () -> getCreateMethod(clazz));
+ }
+ return provider;
+ }
+
+ private static AwsCredentialsProvider getConstructorWithVarArgs(
+ Class extends AwsCredentialsProvider> clazz, String[] varargs) {
+ try {
+ return clazz.getConstructor(String[].class).newInstance((Object) varargs);
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+ private static AwsCredentialsProvider getConstructorWithArgs(
+ Class extends AwsCredentialsProvider> clazz, String[] varargs) {
+ try {
+ Class>[] argTypes = new Class>[varargs.length];
+ Arrays.fill(argTypes, String.class);
+ return clazz.getConstructor(argTypes).newInstance((Object[]) varargs);
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+ private static AwsCredentialsProvider getCreateMethod(
+ Class extends AwsCredentialsProvider> clazz, Object... args) {
+ try {
+ Class>[] argTypes = new Class>[args.length];
+ for (int i = 0; i < args.length; i++) {
+ argTypes[i] = args[i].getClass();
+ }
+ Method createMethod = clazz.getDeclaredMethod("create", argTypes);
+ if (Modifier.isStatic(createMethod.getModifiers())) {
+ return clazz.cast(createMethod.invoke(null, args));
+ } else {
+ log.warn("Found non-static create() method in {}", clazz.getName());
+ }
+ } catch (NoSuchMethodException e) {
+ // No matching create method found for class
+ } catch (Exception e) {
+ log.warn("Failed to invoke create() method in {}", clazz.getName(), e);
+ }
+ return null;
+ }
+
+ /**
+ * Resolves the class for the given provider name.
+ *
+ * @param providerName A string containing the provider name.
+ *
+ * @return The Class object representing the resolved AwsCredentialsProvider implementation,
+ * or null if the class cannot be resolved or does not extend AwsCredentialsProvider.
+ */
+ private static Class extends AwsCredentialsProvider> getClass(String providerName) {
+ // Convert any form of StsAssumeRoleCredentialsProvider string to KclStsAssumeRoleCredentialsProvider
+ if (providerName.equals(StsAssumeRoleCredentialsProvider.class.getSimpleName())
+ || providerName.equals(StsAssumeRoleCredentialsProvider.class.getName())) {
+ providerName = KclStsAssumeRoleCredentialsProvider.class.getName();
+ }
+ try {
+ final Class> c = Class.forName(providerName);
+ if (!AwsCredentialsProvider.class.isAssignableFrom(c)) {
+ return null;
+ }
+ return (Class extends AwsCredentialsProvider>) c;
+ } catch (ClassNotFoundException cnfe) {
+ // Providers are a product of prefixed Strings to cover multiple
+ // namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }).
+ // It's expected that many class names will not resolve.
+ return null;
+ }
+ }
+
+ private static List getProviderNames(String property) {
+ // assume list delimiter is ","
+ String[] elements = property.split(LIST_DELIMITER);
+ List result = new ArrayList<>();
+ for (int i = 0; i < elements.length; i++) {
+ String string = elements[i].trim();
+ if (!string.isEmpty()) {
+ // find all possible names and add them to name list
+ result.addAll(getPossibleFullClassNames(string));
+ }
+ }
+ return result;
+ }
+
+ private static List getPossibleFullClassNames(final String provider) {
+ return Stream.of(
+ // Customer provides a short name of a provider offered by this multi-lang package
+ "software.amazon.kinesis.multilang.auth.",
+ // Customer provides a short name of common providers in software.amazon.awssdk.auth.credentials
+ // package (e.g., any classes implementing the AwsCredentialsProvider interface)
+ // @see
+ // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.html
+ "software.amazon.awssdk.auth.credentials.",
+ // Customer provides a fully-qualified provider name, or a custom credentials provider
+ // (e.g., org.mycompany.FooProvider)
+ "")
+ .map(prefix -> prefix + provider)
+ .collect(Collectors.toList());
+ }
+
+ @FunctionalInterface
+ private interface CredentialsProviderConstructor {
+ T construct()
+ throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException;
+ }
+
+ /**
+ * Attempts to construct an {@link AwsCredentialsProvider}.
+ *
+ * @param providerName Raw, unmodified provider name. Should there be an
+ * Exception during construction, this parameter will be logged.
+ * @param constructor supplier-like function that will perform the construction
+ * @return the constructed provider, if successful; otherwise, null
+ *
+ * @param type of the CredentialsProvider to construct
+ */
+ private static T constructProvider(
+ final String providerName, final CredentialsProviderConstructor constructor) {
+ try {
+ return constructor.construct();
+ } catch (NoSuchMethodException
+ | IllegalAccessException
+ | InstantiationException
+ | InvocationTargetException
+ | RuntimeException ignored) {
+ // ignore
+ }
+ return null;
+ }
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtils.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtils.java
index c6d588075..9337bb997 100644
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtils.java
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtils.java
@@ -93,7 +93,20 @@ public static void resolveFields(
try {
setter = b.configurationClass().getMethod(setterName, value.getClass());
} catch (NoSuchMethodException e) {
- throw new RuntimeException(e);
+ // find if there is a setter which is not the exact parameter type
+ // but is assignable from the type
+ for (Method method : b.configurationClass().getMethods()) {
+ Class>[] parameterTypes = method.getParameterTypes();
+ if (method.getName().equals(setterName)
+ && parameterTypes.length == 1
+ && parameterTypes[0].isAssignableFrom(value.getClass())) {
+ setter = method;
+ break;
+ }
+ }
+ if (setter == null) {
+ throw new RuntimeException(e);
+ }
}
}
try {
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/CoordinatorStateTableConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/CoordinatorStateTableConfigBean.java
new file mode 100644
index 000000000..e4a7fe1fc
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/CoordinatorStateTableConfigBean.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config;
+
+import lombok.Getter;
+import lombok.Setter;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
+import software.amazon.kinesis.multilang.config.converter.TagConverter.TagCollection;
+
+@Getter
+@Setter
+public class CoordinatorStateTableConfigBean {
+
+ interface CoordinatorStateConfigBeanDelegate {
+ String getCoordinatorStateTableName();
+
+ void setCoordinatorStateTableName(String value);
+
+ BillingMode getCoordinatorStateBillingMode();
+
+ void setCoordinatorStateBillingMode(BillingMode value);
+
+ long getCoordinatorStateReadCapacity();
+
+ void setCoordinatorStateReadCapacity(long value);
+
+ long getCoordinatorStateWriteCapacity();
+
+ void setCoordinatorStateWriteCapacity(long value);
+
+ Boolean getCoordinatorStatePointInTimeRecoveryEnabled();
+
+ void setCoordinatorStatePointInTimeRecoveryEnabled(Boolean value);
+
+ Boolean getCoordinatorStateDeletionProtectionEnabled();
+
+ void setCoordinatorStateDeletionProtectionEnabled(Boolean value);
+
+ TagCollection getCoordinatorStateTags();
+
+ void setCoordinatorStateTags(TagCollection value);
+ }
+
+ @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "tableName")
+ private String coordinatorStateTableName;
+
+ @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "billingMode")
+ private BillingMode coordinatorStateBillingMode;
+
+ @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "readCapacity")
+ private long coordinatorStateReadCapacity;
+
+ @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "writeCapacity")
+ private long coordinatorStateWriteCapacity;
+
+ @ConfigurationSettable(
+ configurationClass = CoordinatorStateTableConfig.class,
+ methodName = "pointInTimeRecoveryEnabled")
+ private Boolean coordinatorStatePointInTimeRecoveryEnabled;
+
+ @ConfigurationSettable(
+ configurationClass = CoordinatorStateTableConfig.class,
+ methodName = "deletionProtectionEnabled")
+ private Boolean coordinatorStateDeletionProtectionEnabled;
+
+ @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "tags")
+ private TagCollection coordinatorStateTags;
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/GracefulLeaseHandoffConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/GracefulLeaseHandoffConfigBean.java
new file mode 100644
index 000000000..973279624
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/GracefulLeaseHandoffConfigBean.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config;
+
+import lombok.Getter;
+import lombok.Setter;
+import software.amazon.kinesis.leases.LeaseManagementConfig;
+
+@Getter
+@Setter
+public class GracefulLeaseHandoffConfigBean {
+
+ interface GracefulLeaseHandoffConfigBeanDelegate {
+ Long getGracefulLeaseHandoffTimeoutMillis();
+
+ void setGracefulLeaseHandoffTimeoutMillis(Long value);
+
+ Boolean getIsGracefulLeaseHandoffEnabled();
+
+ void setIsGracefulLeaseHandoffEnabled(Boolean value);
+ }
+
+ @ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class)
+ private Long gracefulLeaseHandoffTimeoutMillis;
+
+ @ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class)
+ private Boolean isGracefulLeaseHandoffEnabled;
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java
index 42b617a03..0d897efa3 100644
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java
@@ -28,7 +28,7 @@
/**
* KinesisClientLibConfigurator constructs a KinesisClientLibConfiguration from java properties file. The following
- * three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AWSCredentialsProvider"
+ * three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AwsCredentialsProvider"
* KinesisClientLibConfigurator will help to automatically assign the value of "workerId" if this property is not
* provided. In the specified properties file, any properties, which matches the variable name in
* KinesisClientLibConfiguration and has a corresponding "with{variableName}" setter method, will be read in, and its
@@ -62,7 +62,7 @@ public MultiLangDaemonConfiguration getConfiguration(Properties properties) {
properties.entrySet().forEach(e -> {
try {
log.info("Processing (key={}, value={})", e.getKey(), e.getValue());
- utilsBean.setProperty(configuration, (String) e.getKey(), e.getValue());
+ utilsBean.setProperty(configuration, processKey((String) e.getKey()), e.getValue());
} catch (IllegalAccessException | InvocationTargetException ex) {
throw new RuntimeException(ex);
}
@@ -110,4 +110,17 @@ public MultiLangDaemonConfiguration getConfiguration(InputStream configStream) {
}
return getConfiguration(properties);
}
+
+ /**
+ * Processes a configuration key to normalize AWS credentials provider naming. Necessary to conform to
+ * autogenerated setters.
+ * @param key the config param key
+ * @return case-configured param key name
+ */
+ String processKey(String key) {
+ if (key.toLowerCase().startsWith("awscredentialsprovider")) {
+ key = key.replaceAll("(?i)awscredentialsprovider", "awsCredentialsProvider");
+ }
+ return key;
+ }
}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java
index 3336be887..33c07c986 100644
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java
@@ -17,6 +17,7 @@
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
+import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
@@ -41,6 +42,7 @@
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
import software.amazon.awssdk.services.kinesis.KinesisAsyncClientBuilder;
import software.amazon.kinesis.checkpoint.CheckpointConfig;
@@ -55,7 +57,9 @@
import software.amazon.kinesis.lifecycle.LifecycleConfig;
import software.amazon.kinesis.metrics.MetricsConfig;
import software.amazon.kinesis.metrics.MetricsLevel;
-import software.amazon.kinesis.multilang.config.credentials.V2CredentialWrapper;
+import software.amazon.kinesis.multilang.config.converter.DurationConverter;
+import software.amazon.kinesis.multilang.config.converter.TagConverter;
+import software.amazon.kinesis.multilang.config.converter.TagConverter.TagCollection;
import software.amazon.kinesis.processor.ProcessorConfig;
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
import software.amazon.kinesis.retrieval.RetrievalConfig;
@@ -156,6 +160,9 @@ public void setInitialPositionInStream(InitialPositionInStream initialPositionIn
@ConfigurationSettable(configurationClass = CoordinatorConfig.class)
private long schedulerInitializationBackoffTimeMillis;
+ @ConfigurationSettable(configurationClass = CoordinatorConfig.class)
+ private CoordinatorConfig.ClientVersionConfig clientVersionConfig;
+
@ConfigurationSettable(configurationClass = LifecycleConfig.class)
private long taskBackoffTimeMillis;
@@ -189,6 +196,22 @@ public void setMetricsEnabledDimensions(String[] dimensions) {
@Delegate(types = PollingConfigBean.PollingConfigBeanDelegate.class)
private final PollingConfigBean pollingConfig = new PollingConfigBean();
+ @Delegate(types = GracefulLeaseHandoffConfigBean.GracefulLeaseHandoffConfigBeanDelegate.class)
+ private final GracefulLeaseHandoffConfigBean gracefulLeaseHandoffConfigBean = new GracefulLeaseHandoffConfigBean();
+
+ @Delegate(
+ types = WorkerUtilizationAwareAssignmentConfigBean.WorkerUtilizationAwareAssignmentConfigBeanDelegate.class)
+ private final WorkerUtilizationAwareAssignmentConfigBean workerUtilizationAwareAssignmentConfigBean =
+ new WorkerUtilizationAwareAssignmentConfigBean();
+
+ @Delegate(types = WorkerMetricStatsTableConfigBean.WorkerMetricsTableConfigBeanDelegate.class)
+ private final WorkerMetricStatsTableConfigBean workerMetricStatsTableConfigBean =
+ new WorkerMetricStatsTableConfigBean();
+
+ @Delegate(types = CoordinatorStateTableConfigBean.CoordinatorStateConfigBeanDelegate.class)
+ private final CoordinatorStateTableConfigBean coordinatorStateTableConfigBean =
+ new CoordinatorStateTableConfigBean();
+
private boolean validateSequenceNumberBeforeCheckpointing;
private long shutdownGraceMillis;
@@ -196,19 +219,19 @@ public void setMetricsEnabledDimensions(String[] dimensions) {
private final BuilderDynaBean kinesisCredentialsProvider;
- public void setAWSCredentialsProvider(String providerString) {
+ public void setAwsCredentialsProvider(String providerString) {
kinesisCredentialsProvider.set("", providerString);
}
private final BuilderDynaBean dynamoDBCredentialsProvider;
- public void setAWSCredentialsProviderDynamoDB(String providerString) {
+ public void setAwsCredentialsProviderDynamoDB(String providerString) {
dynamoDBCredentialsProvider.set("", providerString);
}
private final BuilderDynaBean cloudWatchCredentialsProvider;
- public void setAWSCredentialsProviderCloudWatch(String providerString) {
+ public void setAwsCredentialsProviderCloudWatch(String providerString) {
cloudWatchCredentialsProvider.set("", providerString);
}
@@ -252,6 +275,25 @@ public T convert(Class type, Object value) {
},
InitialPositionInStream.class);
+ convertUtilsBean.register(
+ new Converter() {
+ @Override
+ public T convert(Class type, Object value) {
+ return type.cast(CoordinatorConfig.ClientVersionConfig.valueOf(
+ value.toString().toUpperCase()));
+ }
+ },
+ CoordinatorConfig.ClientVersionConfig.class);
+
+ convertUtilsBean.register(
+ new Converter() {
+ @Override
+ public T convert(Class type, Object value) {
+ return type.cast(BillingMode.valueOf(value.toString().toUpperCase()));
+ }
+ },
+ BillingMode.class);
+
convertUtilsBean.register(
new Converter() {
@Override
@@ -279,12 +321,15 @@ public T convert(final Class type, final Object value) {
},
Region.class);
+ convertUtilsBean.register(new DurationConverter(), Duration.class);
+ convertUtilsBean.register(new TagConverter(), TagCollection.class);
+
ArrayConverter arrayConverter = new ArrayConverter(String[].class, new StringConverter());
arrayConverter.setDelimiter(',');
convertUtilsBean.register(arrayConverter, String[].class);
- AWSCredentialsProviderPropertyValueDecoder oldCredentialsDecoder =
- new AWSCredentialsProviderPropertyValueDecoder();
- Function converter = s -> new V2CredentialWrapper(oldCredentialsDecoder.decodeValue(s));
+ AwsCredentialsProviderPropertyValueDecoder credentialsDecoder =
+ new AwsCredentialsProviderPropertyValueDecoder();
+ Function converter = credentialsDecoder::decodeValue;
this.kinesisCredentialsProvider = new BuilderDynaBean(
AwsCredentialsProvider.class, convertUtilsBean, converter, CREDENTIALS_DEFAULT_SEARCH_PATH);
@@ -370,6 +415,22 @@ private void handleRetrievalConfig(RetrievalConfig retrievalConfig, ConfigsBuild
retrievalMode.builder(this).build(configsBuilder.kinesisClient(), this));
}
+ private void handleCoordinatorConfig(CoordinatorConfig coordinatorConfig) {
+ ConfigurationSettableUtils.resolveFields(
+ this.coordinatorStateTableConfigBean, coordinatorConfig.coordinatorStateTableConfig());
+ }
+
+ private void handleLeaseManagementConfig(LeaseManagementConfig leaseManagementConfig) {
+ ConfigurationSettableUtils.resolveFields(
+ this.gracefulLeaseHandoffConfigBean, leaseManagementConfig.gracefulLeaseHandoffConfig());
+ ConfigurationSettableUtils.resolveFields(
+ this.workerUtilizationAwareAssignmentConfigBean,
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
+ ConfigurationSettableUtils.resolveFields(
+ this.workerMetricStatsTableConfigBean,
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig());
+ }
+
private Object adjustKinesisHttpConfiguration(Object builderObj) {
if (builderObj instanceof KinesisAsyncClientBuilder) {
KinesisAsyncClientBuilder builder = (KinesisAsyncClientBuilder) builderObj;
@@ -448,6 +509,8 @@ ResolvedConfiguration resolvedConfiguration(ShardRecordProcessorFactory shardRec
processorConfig,
retrievalConfig);
+ handleCoordinatorConfig(coordinatorConfig);
+ handleLeaseManagementConfig(leaseManagementConfig);
handleRetrievalConfig(retrievalConfig, configsBuilder);
resolveFields(configObjects, null, new HashSet<>(Arrays.asList(ConfigsBuilder.class, PollingConfig.class)));
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerMetricStatsTableConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerMetricStatsTableConfigBean.java
new file mode 100644
index 000000000..d3e654b75
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerMetricStatsTableConfigBean.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config;
+
+import lombok.Getter;
+import lombok.Setter;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig;
+import software.amazon.kinesis.multilang.config.converter.TagConverter.TagCollection;
+
+@Getter
+@Setter
+public class WorkerMetricStatsTableConfigBean {
+
+ interface WorkerMetricsTableConfigBeanDelegate {
+ String getWorkerMetricsTableName();
+
+ void setWorkerMetricsTableName(String value);
+
+ BillingMode getWorkerMetricsBillingMode();
+
+ void setWorkerMetricsBillingMode(BillingMode value);
+
+ long getWorkerMetricsReadCapacity();
+
+ void setWorkerMetricsReadCapacity(long value);
+
+ long getWorkerMetricsWriteCapacity();
+
+ void setWorkerMetricsWriteCapacity(long value);
+
+ Boolean getWorkerMetricsPointInTimeRecoveryEnabled();
+
+ void setWorkerMetricsPointInTimeRecoveryEnabled(Boolean value);
+
+ Boolean getWorkerMetricsDeletionProtectionEnabled();
+
+ void setWorkerMetricsDeletionProtectionEnabled(Boolean value);
+
+ TagCollection getWorkerMetricsTags();
+
+ void setWorkerMetricsTags(TagCollection value);
+ }
+
+ @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "tableName")
+ private String workerMetricsTableName;
+
+ @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "billingMode")
+ private BillingMode workerMetricsBillingMode;
+
+ @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "readCapacity")
+ private long workerMetricsReadCapacity;
+
+ @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "writeCapacity")
+ private long workerMetricsWriteCapacity;
+
+ @ConfigurationSettable(
+ configurationClass = WorkerMetricsTableConfig.class,
+ methodName = "pointInTimeRecoveryEnabled")
+ private Boolean workerMetricsPointInTimeRecoveryEnabled;
+
+ @ConfigurationSettable(
+ configurationClass = WorkerMetricsTableConfig.class,
+ methodName = "deletionProtectionEnabled")
+ private Boolean workerMetricsDeletionProtectionEnabled;
+
+ @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "tags")
+ private TagCollection workerMetricsTags;
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBean.java
new file mode 100644
index 000000000..fc3352837
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBean.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config;
+
+import java.time.Duration;
+
+import lombok.Getter;
+import lombok.Setter;
+import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
+
+@Getter
+@Setter
+public class WorkerUtilizationAwareAssignmentConfigBean {
+
+ interface WorkerUtilizationAwareAssignmentConfigBeanDelegate {
+ long getInMemoryWorkerMetricsCaptureFrequencyMillis();
+
+ void setInMemoryWorkerMetricsCaptureFrequencyMillis(long value);
+
+ long getWorkerMetricsReporterFreqInMillis();
+
+ void setWorkerMetricsReporterFreqInMillis(long value);
+
+ int getNoOfPersistedMetricsPerWorkerMetrics();
+
+ void setNoOfPersistedMetricsPerWorkerMetrics(int value);
+
+ Boolean getDisableWorkerMetrics();
+
+ void setDisableWorkerMetrics(Boolean value);
+
+ double getMaxThroughputPerHostKBps();
+
+ void setMaxThroughputPerHostKBps(double value);
+
+ int getDampeningPercentage();
+
+ void setDampeningPercentage(int value);
+
+ int getReBalanceThresholdPercentage();
+
+ void setReBalanceThresholdPercentage(int value);
+
+ Boolean getAllowThroughputOvershoot();
+
+ void setAllowThroughputOvershoot(Boolean value);
+
+ int getVarianceBalancingFrequency();
+
+ void setVarianceBalancingFrequency(int value);
+
+ double getWorkerMetricsEMAAlpha();
+
+ void setWorkerMetricsEMAAlpha(double value);
+
+ void setStaleWorkerMetricsEntryCleanupDuration(Duration value);
+
+ Duration getStaleWorkerMetricsEntryCleanupDuration();
+ }
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private long inMemoryWorkerMetricsCaptureFrequencyMillis;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private long workerMetricsReporterFreqInMillis;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private int noOfPersistedMetricsPerWorkerMetrics;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private Boolean disableWorkerMetrics;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private double maxThroughputPerHostKBps;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private int dampeningPercentage;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private int reBalanceThresholdPercentage;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private Boolean allowThroughputOvershoot;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private int varianceBalancingFrequency;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private double workerMetricsEMAAlpha;
+
+ @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
+ private Duration staleWorkerMetricsEntryCleanupDuration;
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/DurationConverter.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/DurationConverter.java
new file mode 100644
index 000000000..fc763d165
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/DurationConverter.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config.converter;
+
+import java.time.Duration;
+
+import org.apache.commons.beanutils.Converter;
+
+/**
+ * Converter that converts Duration text representation to a Duration object.
+ * Refer to {@code Duration.parse} javadocs for the exact text representation.
+ */
+public class DurationConverter implements Converter {
+
+ @Override
+ public T convert(Class type, Object value) {
+ if (value == null) {
+ return null;
+ }
+
+ if (type != Duration.class) {
+ throw new ConversionException("Can only convert to Duration");
+ }
+
+ String durationString = value.toString().trim();
+ final Duration duration = Duration.parse(durationString);
+ if (duration.isNegative()) {
+ throw new ConversionException("Negative values are not permitted for duration: " + durationString);
+ }
+
+ return type.cast(duration);
+ }
+
+ public static class ConversionException extends RuntimeException {
+ public ConversionException(String message) {
+ super(message);
+ }
+ }
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/TagConverter.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/TagConverter.java
new file mode 100644
index 000000000..936f67ec6
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/TagConverter.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config.converter;
+
+import java.util.ArrayList;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.beanutils.Converter;
+import software.amazon.awssdk.services.dynamodb.model.Tag;
+
+/**
+ * Converter that converts to a Collection of Tag object.
+ * The text format accepted are as follows:
+ * tagPropertyName = key1=value1,key2=value2,...
+ */
+@Slf4j
+public class TagConverter implements Converter {
+
+ @Override
+ public T convert(Class type, Object value) {
+ if (value == null) {
+ return null;
+ }
+
+ if (!type.isAssignableFrom(TagCollection.class)) {
+ throw new ConversionException("Can only convert to Collection");
+ }
+
+ final TagCollection collection = new TagCollection();
+ final String tagString = value.toString().trim();
+ final String[] keyValuePairs = tagString.split(",");
+ for (String keyValuePair : keyValuePairs) {
+ final String[] tokens = keyValuePair.trim().split("=");
+ if (tokens.length != 2) {
+ log.warn("Invalid tag {}, ignoring it", keyValuePair);
+ continue;
+ }
+ final Tag tag =
+ Tag.builder().key(tokens[0].trim()).value(tokens[1].trim()).build();
+ log.info("Created tag {}", tag);
+ collection.add(tag);
+ }
+
+ return type.cast(collection);
+ }
+
+ public static class ConversionException extends RuntimeException {
+ public ConversionException(String message) {
+ super(message);
+ }
+ }
+
+ public static class TagCollection extends ArrayList {}
+}
diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/credentials/V2CredentialWrapper.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/credentials/V2CredentialWrapper.java
deleted file mode 100644
index e1b6072af..000000000
--- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/credentials/V2CredentialWrapper.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2019 Amazon.com, Inc. or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package software.amazon.kinesis.multilang.config.credentials;
-
-import com.amazonaws.auth.AWSCredentials;
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.auth.AWSSessionCredentials;
-import lombok.RequiredArgsConstructor;
-import software.amazon.awssdk.auth.credentials.AwsCredentials;
-import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
-import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
-
-@RequiredArgsConstructor
-public class V2CredentialWrapper implements AwsCredentialsProvider {
-
- private final AWSCredentialsProvider oldCredentialsProvider;
-
- @Override
- public AwsCredentials resolveCredentials() {
- AWSCredentials current = oldCredentialsProvider.getCredentials();
- if (current instanceof AWSSessionCredentials) {
- return AwsSessionCredentials.create(
- current.getAWSAccessKeyId(),
- current.getAWSSecretKey(),
- ((AWSSessionCredentials) current).getSessionToken());
- }
- return new AwsCredentials() {
- @Override
- public String accessKeyId() {
- return current.getAWSAccessKeyId();
- }
-
- @Override
- public String secretAccessKey() {
- return current.getAWSSecretKey();
- }
- };
- }
-}
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java
index de5a1405c..53b7f2d80 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java
@@ -65,7 +65,7 @@ public void setup(String streamName, String streamArn) throws IOException {
String properties = String.format(
"executableName = %s\n"
+ "applicationName = %s\n"
- + "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n"
+ + "AwsCredentialsProvider = DefaultCredentialsProvider\n"
+ "processingLanguage = malbolge\n"
+ "regionName = %s\n",
EXE, APPLICATION_NAME, "us-east-1");
@@ -182,7 +182,7 @@ private void assertConfigurationsMatch(String expectedStreamName, String expecte
@Test
public void testPropertyValidation() {
String propertiesNoExecutableName = "applicationName = testApp \n" + "streamName = fakeStream \n"
- + "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n" + "processingLanguage = malbolge";
+ + "AwsCredentialsProvider = DefaultCredentialsProvider\n" + "processingLanguage = malbolge";
ClassLoader classLoader = Mockito.mock(ClassLoader.class);
Mockito.doReturn(new ByteArrayInputStream(propertiesNoExecutableName.getBytes()))
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java
index 3e689437c..453f81aa7 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java
@@ -157,7 +157,7 @@ public void testNoPropertiesFileArgumentOrOption() {
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
- daemon.propertiesFile(arguments);
+ daemon.validateAndGetPropertiesFileName(arguments);
}
@Test
@@ -166,7 +166,7 @@ public void testSuccessfulPropertiesArgument() {
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
arguments.parameters = Collections.singletonList(expectedPropertiesFile);
- String propertiesFile = daemon.propertiesFile(arguments);
+ String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments);
assertThat(propertiesFile, equalTo(expectedPropertiesFile));
}
@@ -180,7 +180,7 @@ public void testPropertiesOptionsOverrideArgument() {
arguments.parameters = Collections.singletonList(propertiesArgument);
arguments.propertiesFile = propertiesOptions;
- String propertiesFile = daemon.propertiesFile(arguments);
+ String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments);
assertThat(propertiesFile, equalTo(propertiesOptions));
}
@@ -193,7 +193,7 @@ public void testExtraArgumentsFailure() {
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
arguments.parameters = Arrays.asList("parameter1", "parameter2");
- daemon.propertiesFile(arguments);
+ daemon.validateAndGetPropertiesFileName(arguments);
}
@Test
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java
index fbffee816..3c2de9c98 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java
@@ -14,11 +14,11 @@
*/
package software.amazon.kinesis.multilang;
-import com.amazonaws.regions.Regions;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
+import software.amazon.awssdk.regions.Region;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.verify;
@@ -64,9 +64,9 @@ public void testInvalidEndpointDoubleCaret() {
@Test
public void testEndpointRegion() {
- final Regions expectedRegion = Regions.GovCloud;
+ final Region expectedRegion = Region.US_GOV_WEST_1;
- parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.getName()));
+ parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.id()));
verify(mockProcessor).acceptEndpointRegion(expectedRegion);
}
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProviderTest.java
similarity index 80%
rename from amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java
rename to amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProviderTest.java
index c27a425d3..9a4d5b0c5 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProviderTest.java
@@ -20,7 +20,7 @@
import static org.junit.Assert.assertEquals;
-public class KclSTSAssumeRoleSessionCredentialsProviderTest {
+public class KclStsAssumeRoleCredentialsProviderTest {
private static final String ARN = "arn";
private static final String SESSION_NAME = "sessionName";
@@ -31,29 +31,29 @@ public class KclSTSAssumeRoleSessionCredentialsProviderTest {
*/
@Test
public void testConstructorWithoutOptionalParams() {
- new KclSTSAssumeRoleSessionCredentialsProvider(new String[] {ARN, SESSION_NAME});
+ new KclStsAssumeRoleCredentialsProvider(new String[] {ARN, SESSION_NAME, "endpointRegion=us-east-1"});
}
@Test
public void testAcceptEndpoint() {
// discovered exception during e2e testing; therefore, this test is
// to simply verify the constructed STS client doesn't go *boom*
- final KclSTSAssumeRoleSessionCredentialsProvider provider =
- new KclSTSAssumeRoleSessionCredentialsProvider(ARN, SESSION_NAME);
+ final KclStsAssumeRoleCredentialsProvider provider =
+ new KclStsAssumeRoleCredentialsProvider(ARN, SESSION_NAME, "endpointRegion=us-east-1");
provider.acceptEndpoint("endpoint", "us-east-1");
}
@Test
public void testVarArgs() {
for (final String[] varargs : Arrays.asList(
- new String[] {ARN, SESSION_NAME, "externalId=eid", "foo"},
- new String[] {ARN, SESSION_NAME, "foo", "externalId=eid"})) {
+ new String[] {ARN, SESSION_NAME, "externalId=eid", "foo", "endpointRegion=us-east-1"},
+ new String[] {ARN, SESSION_NAME, "foo", "externalId=eid", "endpointRegion=us-east-1"})) {
final VarArgsSpy provider = new VarArgsSpy(varargs);
assertEquals("eid", provider.externalId);
}
}
- private static class VarArgsSpy extends KclSTSAssumeRoleSessionCredentialsProvider {
+ private static class VarArgsSpy extends KclStsAssumeRoleCredentialsProvider {
private String externalId;
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoderTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoderTest.java
similarity index 51%
rename from amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoderTest.java
rename to amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoderTest.java
index ba5a0925f..40b0366c9 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoderTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoderTest.java
@@ -16,16 +16,17 @@
import java.util.Arrays;
-import com.amazonaws.auth.AWSCredentials;
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.auth.AWSCredentialsProviderChain;
-import com.amazonaws.auth.BasicAWSCredentials;
import lombok.ToString;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.hamcrest.TypeSafeDiagnosingMatcher;
import org.junit.Test;
-import software.amazon.kinesis.multilang.auth.KclSTSAssumeRoleSessionCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain;
+import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
+import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf;
@@ -33,31 +34,32 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
-public class AWSCredentialsProviderPropertyValueDecoderTest {
+public class AwsCredentialsProviderPropertyValueDecoderTest {
private static final String TEST_ACCESS_KEY_ID = "123";
private static final String TEST_SECRET_KEY = "456";
private final String credentialName1 = AlwaysSucceedCredentialsProvider.class.getName();
private final String credentialName2 = ConstructorCredentialsProvider.class.getName();
- private final AWSCredentialsProviderPropertyValueDecoder decoder = new AWSCredentialsProviderPropertyValueDecoder();
+ private final String createCredentialClass = CreateProvider.class.getName();
+ private final AwsCredentialsProviderPropertyValueDecoder decoder = new AwsCredentialsProviderPropertyValueDecoder();
@ToString
- private static class AWSCredentialsMatcher extends TypeSafeDiagnosingMatcher {
+ private static class AwsCredentialsMatcher extends TypeSafeDiagnosingMatcher {
private final Matcher akidMatcher;
private final Matcher secretMatcher;
private final Matcher> classMatcher;
- public AWSCredentialsMatcher(String akid, String secret) {
+ public AwsCredentialsMatcher(String akid, String secret) {
this.akidMatcher = equalTo(akid);
this.secretMatcher = equalTo(secret);
- this.classMatcher = instanceOf(AWSCredentialsProviderChain.class);
+ this.classMatcher = instanceOf(AwsCredentialsProviderChain.class);
}
@Override
- protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatchDescription) {
- AWSCredentials actual = item.getCredentials();
+ protected boolean matchesSafely(AwsCredentialsProvider item, Description mismatchDescription) {
+ AwsCredentials actual = item.resolveCredentials();
boolean matched = true;
if (!classMatcher.matches(item)) {
@@ -65,12 +67,12 @@ protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatc
matched = false;
}
- if (!akidMatcher.matches(actual.getAWSAccessKeyId())) {
- akidMatcher.describeMismatch(actual.getAWSAccessKeyId(), mismatchDescription);
+ if (!akidMatcher.matches(actual.accessKeyId())) {
+ akidMatcher.describeMismatch(actual.accessKeyId(), mismatchDescription);
matched = false;
}
- if (!secretMatcher.matches(actual.getAWSSecretKey())) {
- secretMatcher.describeMismatch(actual.getAWSSecretKey(), mismatchDescription);
+ if (!secretMatcher.matches(actual.secretAccessKey())) {
+ secretMatcher.describeMismatch(actual.secretAccessKey(), mismatchDescription);
matched = false;
}
return matched;
@@ -79,36 +81,36 @@ protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatc
@Override
public void describeTo(Description description) {
description
- .appendText("An AWSCredentialsProvider that provides an AWSCredential matching: ")
+ .appendText("An AwsCredentialsProvider that provides an AwsCredential matching: ")
.appendList("(", ", ", ")", Arrays.asList(classMatcher, akidMatcher, secretMatcher));
}
}
- private static AWSCredentialsMatcher hasCredentials(String akid, String secret) {
- return new AWSCredentialsMatcher(akid, secret);
+ private static AwsCredentialsMatcher hasCredentials(String akid, String secret) {
+ return new AwsCredentialsMatcher(akid, secret);
}
@Test
public void testSingleProvider() {
- AWSCredentialsProvider provider = decoder.decodeValue(credentialName1);
+ AwsCredentialsProvider provider = decoder.decodeValue(credentialName1);
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
}
@Test
public void testTwoProviders() {
- AWSCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1);
+ AwsCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1);
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
}
@Test
public void testProfileProviderWithOneArg() {
- AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg");
+ AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg");
assertThat(provider, hasCredentials("arg", "blank"));
}
@Test
public void testProfileProviderWithTwoArgs() {
- AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2");
+ AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2");
assertThat(provider, hasCredentials("arg1", "arg2"));
}
@@ -118,14 +120,34 @@ public void testProfileProviderWithTwoArgs() {
@Test
public void testKclAuthProvider() {
for (final String className : Arrays.asList(
- KclSTSAssumeRoleSessionCredentialsProvider.class.getName(), // fully-qualified name
- KclSTSAssumeRoleSessionCredentialsProvider.class.getSimpleName() // name-only; needs prefix
- )) {
- final AWSCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName");
+ KclStsAssumeRoleCredentialsProvider.class.getName(), // fully-qualified name
+ KclStsAssumeRoleCredentialsProvider.class.getSimpleName(), // name-only; needs prefix
+ StsAssumeRoleCredentialsProvider.class.getName(), // user passes full sts package path
+ StsAssumeRoleCredentialsProvider.class.getSimpleName())) {
+ final AwsCredentialsProvider provider =
+ decoder.decodeValue(className + "|arn|sessionName|endpointRegion=us-east-1");
assertNotNull(className, provider);
}
}
+ /**
+ * Test that OneArgCreateProvider in the SDK v2 can process a create() method
+ */
+ @Test
+ public void testEmptyCreateProvider() {
+ AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass);
+ assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
+ }
+
+ /**
+ * Test that OneArgCreateProvider in the SDK v2 can process a create(arg1) method
+ */
+ @Test
+ public void testOneArgCreateProvider() {
+ AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass + "|testCreateProperty");
+ assertThat(provider, hasCredentials("testCreateProperty", TEST_SECRET_KEY));
+ }
+
/**
* Test that a provider can be instantiated by its varargs constructor.
*/
@@ -135,28 +157,24 @@ public void testVarArgAuthProvider() {
final String className = VarArgCredentialsProvider.class.getName();
final String encodedValue = className + "|" + String.join("|", args);
- final AWSCredentialsProvider provider = decoder.decodeValue(encodedValue);
- assertEquals(Arrays.toString(args), provider.getCredentials().getAWSAccessKeyId());
+ final AwsCredentialsProvider provider = decoder.decodeValue(encodedValue);
+ assertEquals(Arrays.toString(args), provider.resolveCredentials().accessKeyId());
}
/**
* This credentials provider will always succeed
*/
- public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider {
-
+ public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider {
@Override
- public AWSCredentials getCredentials() {
- return new BasicAWSCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY);
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY);
}
-
- @Override
- public void refresh() {}
}
/**
* This credentials provider needs a constructor call to instantiate it
*/
- public static class ConstructorCredentialsProvider implements AWSCredentialsProvider {
+ public static class ConstructorCredentialsProvider implements AwsCredentialsProvider {
private String arg1;
private String arg2;
@@ -172,15 +190,12 @@ public ConstructorCredentialsProvider(String arg1, String arg2) {
}
@Override
- public AWSCredentials getCredentials() {
- return new BasicAWSCredentials(arg1, arg2);
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create(arg1, arg2);
}
-
- @Override
- public void refresh() {}
}
- private static class VarArgCredentialsProvider implements AWSCredentialsProvider {
+ private static class VarArgCredentialsProvider implements AwsCredentialsProvider {
private final String[] args;
@@ -189,13 +204,34 @@ public VarArgCredentialsProvider(final String[] args) {
}
@Override
- public AWSCredentials getCredentials() {
+ public AwsCredentials resolveCredentials() {
// KISS solution to surface the constructor args
final String flattenedArgs = Arrays.toString(args);
- return new BasicAWSCredentials(flattenedArgs, flattenedArgs);
+ return AwsBasicCredentials.create(flattenedArgs, flattenedArgs);
+ }
+ }
+
+ /**
+ * Credentials provider to test AWS SDK v2 create() methods for providers like ProfileCredentialsProvider
+ */
+ public static class CreateProvider implements AwsCredentialsProvider {
+ private String accessKeyId;
+
+ private CreateProvider(String accessKeyId) {
+ this.accessKeyId = accessKeyId;
+ }
+
+ public static CreateProvider create() {
+ return new CreateProvider(TEST_ACCESS_KEY_ID);
+ }
+
+ public static CreateProvider create(String accessKeyId) {
+ return new CreateProvider(accessKeyId);
}
@Override
- public void refresh() {}
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create(accessKeyId, TEST_SECRET_KEY);
+ }
}
}
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java
index 5e0db340c..cee3cad27 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java
@@ -52,6 +52,16 @@ public void testPrimitivesSet() {
assertThat(actual, equalTo(expected));
}
+ @Test
+ public void testBoolean() {
+ ConfigResult expected = ConfigResult.builder().bool(false).build();
+
+ ConfigObject configObject = ConfigObject.builder().bool(expected.bool).build();
+ ConfigResult actual = resolve(configObject);
+
+ assertThat(actual, equalTo(expected));
+ }
+
@Test
public void testHeapValuesSet() {
ConfigResult expected =
@@ -147,6 +157,9 @@ public static class ConfigResult {
private Long boxedLong;
private ComplexValue complexValue;
+ @Builder.Default
+ private Boolean bool = true;
+
private Optional optionalString;
private Optional optionalInteger;
private Optional optionalLong;
@@ -175,6 +188,10 @@ public static class ConfigObject {
@ConfigurationSettable(configurationClass = ConfigResult.class)
private int rawInt;
+ @ConfigurationSettable(configurationClass = ConfigResult.class)
+ @Builder.Default
+ private Boolean bool = true;
+
@ConfigurationSettable(configurationClass = ConfigResult.class)
private Integer boxedInt;
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java
index b0e3b870b..a72b1a960 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java
@@ -20,19 +20,21 @@
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
+import java.util.NoSuchElementException;
import java.util.Set;
-import com.amazonaws.auth.AWSCredentials;
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.auth.BasicAWSCredentials;
import com.google.common.collect.ImmutableSet;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.runners.MockitoJUnitRunner;
+import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.kinesis.common.InitialPositionInStream;
+import software.amazon.kinesis.coordinator.CoordinatorConfig;
import software.amazon.kinesis.metrics.MetricsLevel;
import static org.hamcrest.CoreMatchers.equalTo;
@@ -40,6 +42,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -60,7 +63,7 @@ public void testWithBasicSetup() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"workerId = 123"
},
'\n'));
@@ -69,6 +72,8 @@ public void testWithBasicSetup() {
assertEquals(config.getWorkerIdentifier(), "123");
assertThat(config.getMaxGetRecordsThreadPool(), nullValue());
assertThat(config.getRetryGetRecordsInSeconds(), nullValue());
+ assertNull(config.getGracefulLeaseHandoffTimeoutMillis());
+ assertNull(config.getIsGracefulLeaseHandoffEnabled());
}
@Test
@@ -77,7 +82,7 @@ public void testWithLongVariables() {
new String[] {
"applicationName = app",
"streamName = 123",
- "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
"workerId = 123",
"failoverTimeMillis = 100",
"shardSyncIntervalMillis = 500"
@@ -98,7 +103,7 @@ public void testWithInitialPositionInStreamExtended() {
new String[] {
"applicationName = app",
"streamName = 123",
- "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
"initialPositionInStreamExtended = " + epochTimeInSeconds
},
'\n'));
@@ -116,7 +121,7 @@ public void testInvalidInitialPositionInStream() {
new String[] {
"applicationName = app",
"streamName = 123",
- "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
"initialPositionInStream = AT_TIMESTAMP"
},
'\n'));
@@ -136,7 +141,7 @@ public void testInvalidInitialPositionInStreamExtended() {
new String[] {
"applicationName = app",
"streamName = 123",
- "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
"initialPositionInStreamExtended = null"
},
'\n'));
@@ -147,11 +152,156 @@ public void testInvalidInitialPositionInStreamExtended() {
}
}
+ @Test
+ public void testGracefulLeaseHandoffConfig() {
+ final Long testGracefulLeaseHandoffTimeoutMillis = 12345L;
+ final boolean testGracefulLeaseHandoffEnabled = true;
+
+ final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
+ new String[] {
+ "applicationName = dummyApplicationName",
+ "streamName = dummyStreamName",
+ "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "gracefulLeaseHandoffTimeoutMillis = " + testGracefulLeaseHandoffTimeoutMillis,
+ "isGracefulLeaseHandoffEnabled = " + testGracefulLeaseHandoffEnabled
+ },
+ '\n'));
+
+ assertEquals(testGracefulLeaseHandoffTimeoutMillis, config.getGracefulLeaseHandoffTimeoutMillis());
+ assertEquals(testGracefulLeaseHandoffEnabled, config.getIsGracefulLeaseHandoffEnabled());
+ }
+
+ @Test
+ public void testClientVersionConfig() {
+ final CoordinatorConfig.ClientVersionConfig testClientVersionConfig = Arrays.stream(
+ CoordinatorConfig.ClientVersionConfig.values())
+ .findAny()
+ .orElseThrow(NoSuchElementException::new);
+
+ final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
+ new String[] {
+ "applicationName = dummyApplicationName",
+ "streamName = dummyStreamName",
+ "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "clientVersionConfig = " + testClientVersionConfig.name()
+ },
+ '\n'));
+
+ assertEquals(testClientVersionConfig, config.getClientVersionConfig());
+ }
+
+ @Test
+ public void testCoordinatorStateConfig() {
+ final String testCoordinatorStateTableName = "CoordState";
+ final BillingMode testCoordinatorStateBillingMode = BillingMode.PAY_PER_REQUEST;
+ final long testCoordinatorStateReadCapacity = 123;
+ final long testCoordinatorStateWriteCapacity = 123;
+
+ final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
+ new String[] {
+ "applicationName = dummyApplicationName",
+ "streamName = dummyStreamName",
+ "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "coordinatorStateTableName = " + testCoordinatorStateTableName,
+ "coordinatorStateBillingMode = " + testCoordinatorStateBillingMode.name(),
+ "coordinatorStateReadCapacity = " + testCoordinatorStateReadCapacity,
+ "coordinatorStateWriteCapacity = " + testCoordinatorStateWriteCapacity
+ },
+ '\n'));
+
+ assertEquals(testCoordinatorStateTableName, config.getCoordinatorStateTableName());
+ assertEquals(testCoordinatorStateBillingMode, config.getCoordinatorStateBillingMode());
+ assertEquals(testCoordinatorStateReadCapacity, config.getCoordinatorStateReadCapacity());
+ assertEquals(testCoordinatorStateWriteCapacity, config.getCoordinatorStateWriteCapacity());
+ }
+
+ @Test
+ public void testWorkerUtilizationAwareAssignmentConfig() {
+ final long testInMemoryWorkerMetricsCaptureFrequencyMillis = 123;
+ final long testWorkerMetricsReporterFreqInMillis = 123;
+ final long testNoOfPersistedMetricsPerWorkerMetrics = 123;
+ final Boolean testDisableWorkerMetrics = true;
+ final double testMaxThroughputPerHostKBps = 123;
+ final long testDampeningPercentage = 12;
+ final long testReBalanceThresholdPercentage = 12;
+ final Boolean testAllowThroughputOvershoot = false;
+ final long testVarianceBalancingFrequency = 12;
+ final double testWorkerMetricsEMAAlpha = .123;
+
+ final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
+ new String[] {
+ "applicationName = dummyApplicationName",
+ "streamName = dummyStreamName",
+ "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "inMemoryWorkerMetricsCaptureFrequencyMillis = " + testInMemoryWorkerMetricsCaptureFrequencyMillis,
+ "workerMetricsReporterFreqInMillis = " + testWorkerMetricsReporterFreqInMillis,
+ "noOfPersistedMetricsPerWorkerMetrics = " + testNoOfPersistedMetricsPerWorkerMetrics,
+ "disableWorkerMetrics = " + testDisableWorkerMetrics,
+ "maxThroughputPerHostKBps = " + testMaxThroughputPerHostKBps,
+ "dampeningPercentage = " + testDampeningPercentage,
+ "reBalanceThresholdPercentage = " + testReBalanceThresholdPercentage,
+ "allowThroughputOvershoot = " + testAllowThroughputOvershoot,
+ "varianceBalancingFrequency = " + testVarianceBalancingFrequency,
+ "workerMetricsEMAAlpha = " + testWorkerMetricsEMAAlpha
+ },
+ '\n'));
+
+ assertEquals(
+ testInMemoryWorkerMetricsCaptureFrequencyMillis,
+ config.getInMemoryWorkerMetricsCaptureFrequencyMillis());
+ assertEquals(testWorkerMetricsReporterFreqInMillis, config.getWorkerMetricsReporterFreqInMillis());
+ assertEquals(testNoOfPersistedMetricsPerWorkerMetrics, config.getNoOfPersistedMetricsPerWorkerMetrics());
+ assertEquals(testDisableWorkerMetrics, config.getDisableWorkerMetrics());
+ assertEquals(testMaxThroughputPerHostKBps, config.getMaxThroughputPerHostKBps(), 0.0001);
+ assertEquals(testDampeningPercentage, config.getDampeningPercentage());
+ assertEquals(testReBalanceThresholdPercentage, config.getReBalanceThresholdPercentage());
+ assertEquals(testAllowThroughputOvershoot, config.getAllowThroughputOvershoot());
+ assertEquals(testVarianceBalancingFrequency, config.getVarianceBalancingFrequency());
+ assertEquals(testWorkerMetricsEMAAlpha, config.getWorkerMetricsEMAAlpha(), 0.0001);
+ }
+
+ @Test
+ public void testWorkerMetricsConfig() {
+ final String testWorkerMetricsTableName = "CoordState";
+ final BillingMode testWorkerMetricsBillingMode = BillingMode.PROVISIONED;
+ final long testWorkerMetricsReadCapacity = 123;
+ final long testWorkerMetricsWriteCapacity = 123;
+
+ final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
+ new String[] {
+ "applicationName = dummyApplicationName",
+ "streamName = dummyStreamName",
+ "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "workerMetricsTableName = " + testWorkerMetricsTableName,
+ "workerMetricsBillingMode = " + testWorkerMetricsBillingMode.name(),
+ "workerMetricsReadCapacity = " + testWorkerMetricsReadCapacity,
+ "workerMetricsWriteCapacity = " + testWorkerMetricsWriteCapacity
+ },
+ '\n'));
+
+ assertEquals(testWorkerMetricsTableName, config.getWorkerMetricsTableName());
+ assertEquals(testWorkerMetricsBillingMode, config.getWorkerMetricsBillingMode());
+ assertEquals(testWorkerMetricsReadCapacity, config.getWorkerMetricsReadCapacity());
+ assertEquals(testWorkerMetricsWriteCapacity, config.getWorkerMetricsWriteCapacity());
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testInvalidClientVersionConfig() {
+ getConfiguration(StringUtils.join(
+ new String[] {
+ "applicationName = dummyApplicationName",
+ "streamName = dummyStreamName",
+ "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "clientVersionConfig = " + "invalid_client_version_config"
+ },
+ '\n'));
+ }
+
@Test
public void testWithUnsupportedClientConfigurationVariables() {
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
new String[] {
- "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
+ "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
"workerId = id",
"kinesisClientConfig = {}",
"streamName = stream",
@@ -170,7 +320,7 @@ public void testWithIntVariables() {
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
new String[] {
"streamName = kinesis",
- "AWSCredentialsProvider = " + credentialName2 + ", " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName2 + ", " + credentialName1,
"workerId = w123",
"maxRecords = 10",
"metricsMaxQueueSize = 20",
@@ -195,7 +345,7 @@ public void testWithBooleanVariables() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD, " + credentialName1,
+ "AwsCredentialsProvider = ABCD, " + credentialName1,
"workerId = 0",
"cleanupLeasesUponShardCompletion = false",
"validateSequenceNumberBeforeCheckpointing = true"
@@ -215,7 +365,7 @@ public void testWithStringVariables() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 1",
"kinesisEndpoint = https://kinesis",
"metricsLevel = SUMMARY"
@@ -233,7 +383,7 @@ public void testWithSetVariables() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 1",
"metricsEnabledDimensions = ShardId, WorkerIdentifier"
},
@@ -253,7 +403,7 @@ public void testWithInitialPositionInStreamTrimHorizon() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 123",
"initialPositionInStream = TriM_Horizon"
},
@@ -268,7 +418,7 @@ public void testWithInitialPositionInStreamLatest() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 123",
"initialPositionInStream = LateSt"
},
@@ -283,7 +433,7 @@ public void testSkippingNonKCLVariables() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 123",
"initialPositionInStream = TriM_Horizon",
"abc = 1"
@@ -302,7 +452,7 @@ public void testEmptyOptionalVariables() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 123",
"initialPositionInStream = TriM_Horizon",
"maxGetRecordsThreadPool = 1"
@@ -318,7 +468,7 @@ public void testWithZeroValue() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = ABCD," + credentialName1,
+ "AwsCredentialsProvider = ABCD," + credentialName1,
"workerId = 123",
"initialPositionInStream = TriM_Horizon",
"maxGetRecordsThreadPool = 0",
@@ -334,7 +484,7 @@ public void testWithInvalidIntValue() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"workerId = 123",
"failoverTimeMillis = 100nf"
},
@@ -348,7 +498,7 @@ public void testWithNegativeIntValue() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"workerId = 123",
"failoverTimeMillis = -12"
},
@@ -380,7 +530,7 @@ public void testWithMissingWorkerId() {
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"failoverTimeMillis = 100",
"shardSyncIntervalMillis = 500"
},
@@ -397,7 +547,7 @@ public void testWithMissingStreamNameAndMissingStreamArn() {
String test = StringUtils.join(
new String[] {
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"workerId = 123",
"failoverTimeMillis = 100"
},
@@ -410,7 +560,7 @@ public void testWithEmptyStreamNameAndMissingStreamArn() {
String test = StringUtils.join(
new String[] {
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"workerId = 123",
"failoverTimeMillis = 100",
"streamName = ",
@@ -425,7 +575,7 @@ public void testWithMissingApplicationName() {
String test = StringUtils.join(
new String[] {
"streamName = a",
- "AWSCredentialsProvider = " + credentialName1,
+ "AwsCredentialsProvider = " + credentialName1,
"workerId = 123",
"failoverTimeMillis = 100"
},
@@ -434,12 +584,12 @@ public void testWithMissingApplicationName() {
}
@Test
- public void testWithAWSCredentialsFailed() {
+ public void testWithAwsCredentialsFailed() {
String test = StringUtils.join(
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialName2,
+ "AwsCredentialsProvider = " + credentialName2,
"failoverTimeMillis = 100",
"shardSyncIntervalMillis = 500"
},
@@ -457,16 +607,44 @@ public void testWithAWSCredentialsFailed() {
}
}
+ @Test
+ public void testProcessKeyWithExpectedCasing() {
+ String key = "AwsCredentialsProvider";
+ String result = configurator.processKey(key);
+ assertEquals("awsCredentialsProvider", result);
+ }
+
+ @Test
+ public void testProcessKeyWithOldCasing() {
+ String key = "AWSCredentialsProvider";
+ String result = configurator.processKey(key);
+ assertEquals("awsCredentialsProvider", result);
+ }
+
+ @Test
+ public void testProcessKeyWithMixedCasing() {
+ String key = "AwScReDeNtIaLsPrOvIdEr";
+ String result = configurator.processKey(key);
+ assertEquals("awsCredentialsProvider", result);
+ }
+
+ @Test
+ public void testProcessKeyWithSuffix() {
+ String key = "awscredentialsproviderDynamoDB";
+ String result = configurator.processKey(key);
+ assertEquals("awsCredentialsProviderDynamoDB", result);
+ }
+
// TODO: fix this test
@Test
- public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatch() {
+ public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatch() {
String test = StringUtils.join(
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialNameKinesis,
- "AWSCredentialsProviderDynamoDB = " + credentialNameDynamoDB,
- "AWSCredentialsProviderCloudWatch = " + credentialNameCloudWatch,
+ "AwsCredentialsProvider = " + credentialNameKinesis,
+ "AwsCredentialsProviderDynamoDB = " + credentialNameDynamoDB,
+ "AwsCredentialsProviderCloudWatch = " + credentialNameCloudWatch,
"failoverTimeMillis = 100",
"shardSyncIntervalMillis = 500"
},
@@ -487,14 +665,14 @@ public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatch() {
// TODO: fix this test
@Test
- public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatchFailed() {
+ public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatchFailed() {
String test = StringUtils.join(
new String[] {
"streamName = a",
"applicationName = b",
- "AWSCredentialsProvider = " + credentialNameKinesis,
- "AWSCredentialsProviderDynamoDB = " + credentialName2,
- "AWSCredentialsProviderCloudWatch = " + credentialName2,
+ "AwsCredentialsProvider = " + credentialNameKinesis,
+ "AwsCredentialsProviderDynamoDB = " + credentialName2,
+ "AwsCredentialsProviderCloudWatch = " + credentialName2,
"failoverTimeMillis = 100",
"shardSyncIntervalMillis = 500"
},
@@ -526,71 +704,52 @@ public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatchFailed() {
/**
* This credentials provider will always succeed
*/
- public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider {
-
+ public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider {
@Override
- public AWSCredentials getCredentials() {
- return new BasicAWSCredentials("a", "b");
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create("a", "b");
}
-
- @Override
- public void refresh() {}
}
/**
* This credentials provider will always succeed
*/
- public static class AlwaysSucceedCredentialsProviderKinesis implements AWSCredentialsProvider {
-
+ public static class AlwaysSucceedCredentialsProviderKinesis implements AwsCredentialsProvider {
@Override
- public AWSCredentials getCredentials() {
- return new BasicAWSCredentials("", "");
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
}
-
- @Override
- public void refresh() {}
}
/**
* This credentials provider will always succeed
*/
- public static class AlwaysSucceedCredentialsProviderDynamoDB implements AWSCredentialsProvider {
-
+ public static class AlwaysSucceedCredentialsProviderDynamoDB implements AwsCredentialsProvider {
@Override
- public AWSCredentials getCredentials() {
- return new BasicAWSCredentials("", "");
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
}
-
- @Override
- public void refresh() {}
}
/**
* This credentials provider will always succeed
*/
- public static class AlwaysSucceedCredentialsProviderCloudWatch implements AWSCredentialsProvider {
-
+ public static class AlwaysSucceedCredentialsProviderCloudWatch implements AwsCredentialsProvider {
@Override
- public AWSCredentials getCredentials() {
- return new BasicAWSCredentials("", "");
+ public AwsCredentials resolveCredentials() {
+ return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
}
-
- @Override
- public void refresh() {}
}
/**
* This credentials provider will always fail
*/
- public static class AlwaysFailCredentialsProvider implements AWSCredentialsProvider {
+ public static class AlwaysFailCredentialsProvider implements AwsCredentialsProvider {
@Override
- public AWSCredentials getCredentials() {
+ public AwsCredentials resolveCredentials() {
throw new IllegalArgumentException();
}
-
- @Override
- public void refresh() {}
}
private MultiLangDaemonConfiguration getConfiguration(String configString) {
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java
index 1c45eb6e8..60a55c65a 100644
--- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java
@@ -15,6 +15,9 @@
package software.amazon.kinesis.multilang.config;
+import java.util.Arrays;
+import java.util.NoSuchElementException;
+
import org.apache.commons.beanutils.BeanUtilsBean;
import org.apache.commons.beanutils.ConvertUtilsBean;
import org.junit.After;
@@ -24,8 +27,16 @@
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.mockito.Mock;
+import org.mockito.Mockito;
import org.mockito.runners.MockitoJUnitRunner;
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
+import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
+import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
+import software.amazon.kinesis.common.ConfigsBuilder;
+import software.amazon.kinesis.coordinator.CoordinatorConfig;
+import software.amazon.kinesis.leases.LeaseManagementConfig;
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
import software.amazon.kinesis.retrieval.fanout.FanOutConfig;
import software.amazon.kinesis.retrieval.polling.PollingConfig;
@@ -34,6 +45,7 @@
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
@@ -41,6 +53,8 @@
public class MultiLangDaemonConfigurationTest {
private static final String AWS_REGION_PROPERTY_NAME = "aws.region";
+ private static final String DUMMY_APPLICATION_NAME = "dummyApplicationName";
+ private static final String DUMMY_STREAM_NAME = "dummyStreamName";
private BeanUtilsBean utilsBean;
private ConvertUtilsBean convertUtilsBean;
@@ -71,8 +85,8 @@ public void after() {
public MultiLangDaemonConfiguration baseConfiguration() {
MultiLangDaemonConfiguration configuration = new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
- configuration.setApplicationName("Test");
- configuration.setStreamName("Test");
+ configuration.setApplicationName(DUMMY_APPLICATION_NAME);
+ configuration.setStreamName(DUMMY_STREAM_NAME);
configuration.getKinesisCredentialsProvider().set("class", DefaultCredentialsProvider.class.getName());
return configuration;
@@ -111,6 +125,197 @@ public void testSetLeaseTableDeletionProtectionEnabledToTrue() {
assertTrue(resolvedConfiguration.leaseManagementConfig.leaseTableDeletionProtectionEnabled());
}
+ @Test
+ public void testGracefulLeaseHandoffConfig() {
+ final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig =
+ getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig();
+
+ final long testGracefulLeaseHandoffTimeoutMillis =
+ defaultGracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis() + 12345;
+ final boolean testGracefulLeaseHandoffEnabled =
+ !defaultGracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled();
+
+ final MultiLangDaemonConfiguration configuration = baseConfiguration();
+ configuration.setGracefulLeaseHandoffTimeoutMillis(testGracefulLeaseHandoffTimeoutMillis);
+ configuration.setIsGracefulLeaseHandoffEnabled(testGracefulLeaseHandoffEnabled);
+
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+
+ final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
+ resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig();
+
+ assertEquals(
+ testGracefulLeaseHandoffTimeoutMillis, gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis());
+ assertEquals(testGracefulLeaseHandoffEnabled, gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled());
+ }
+
+ @Test
+ public void testGracefulLeaseHandoffUsesDefaults() {
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory);
+
+ final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
+ resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig();
+
+ final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig =
+ getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig();
+
+ assertEquals(defaultGracefulLeaseHandoffConfig, gracefulLeaseHandoffConfig);
+ }
+
+ @Test
+ public void testWorkerUtilizationAwareAssignmentConfig() {
+ MultiLangDaemonConfiguration configuration = baseConfiguration();
+
+ configuration.setInMemoryWorkerMetricsCaptureFrequencyMillis(123);
+ configuration.setWorkerMetricsReporterFreqInMillis(123);
+ configuration.setNoOfPersistedMetricsPerWorkerMetrics(123);
+ configuration.setDisableWorkerMetrics(true);
+ configuration.setMaxThroughputPerHostKBps(.123);
+ configuration.setDampeningPercentage(12);
+ configuration.setReBalanceThresholdPercentage(12);
+ configuration.setAllowThroughputOvershoot(false);
+ configuration.setVarianceBalancingFrequency(12);
+ configuration.setWorkerMetricsEMAAlpha(.123);
+
+ MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+ LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig;
+ LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config =
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
+
+ assertEquals(config.inMemoryWorkerMetricsCaptureFrequencyMillis(), 123);
+ assertEquals(config.workerMetricsReporterFreqInMillis(), 123);
+ assertEquals(config.noOfPersistedMetricsPerWorkerMetrics(), 123);
+ assertTrue(config.disableWorkerMetrics());
+ assertEquals(config.maxThroughputPerHostKBps(), .123, .25);
+ assertEquals(config.dampeningPercentage(), 12);
+ assertEquals(config.reBalanceThresholdPercentage(), 12);
+ assertFalse(config.allowThroughputOvershoot());
+ assertEquals(config.varianceBalancingFrequency(), 12);
+ assertEquals(config.workerMetricsEMAAlpha(), .123, .25);
+ }
+
+ @Test
+ public void testWorkerUtilizationAwareAssignmentConfigUsesDefaults() {
+ final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig defaultWorkerUtilAwareAssignmentConfig =
+ getTestConfigsBuilder().leaseManagementConfig().workerUtilizationAwareAssignmentConfig();
+
+ final MultiLangDaemonConfiguration configuration = baseConfiguration();
+ configuration.setVarianceBalancingFrequency(
+ defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency() + 12345);
+
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+
+ final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig resolvedWorkerUtilAwareAssignmentConfig =
+ resolvedConfiguration.leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
+
+ assertNotEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig);
+
+ // apart from the single updated configuration, all other config values should be equal to the default
+ resolvedWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency(
+ defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency());
+ assertEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig);
+ }
+
+ @Test
+ public void testWorkerMetricsTableConfigBean() {
+ final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PROVISIONED;
+
+ MultiLangDaemonConfiguration configuration = baseConfiguration();
+
+ configuration.setWorkerMetricsTableName("testTable");
+ configuration.setWorkerMetricsBillingMode(testWorkerMetricsTableBillingMode);
+ configuration.setWorkerMetricsReadCapacity(123);
+ configuration.setWorkerMetricsWriteCapacity(123);
+
+ MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+ LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig;
+ LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationConfig =
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
+ LeaseManagementConfig.WorkerMetricsTableConfig workerMetricsConfig =
+ workerUtilizationConfig.workerMetricsTableConfig();
+
+ assertEquals(workerMetricsConfig.tableName(), "testTable");
+ assertEquals(workerMetricsConfig.billingMode(), testWorkerMetricsTableBillingMode);
+ assertEquals(workerMetricsConfig.readCapacity(), 123);
+ assertEquals(workerMetricsConfig.writeCapacity(), 123);
+ }
+
+ @Test
+ public void testWorkerMetricsTableConfigUsesDefaults() {
+ final LeaseManagementConfig.WorkerMetricsTableConfig defaultWorkerMetricsTableConfig = getTestConfigsBuilder()
+ .leaseManagementConfig()
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig();
+
+ final MultiLangDaemonConfiguration configuration = baseConfiguration();
+ configuration.setWorkerMetricsBillingMode(Arrays.stream(BillingMode.values())
+ .filter(billingMode -> billingMode != defaultWorkerMetricsTableConfig.billingMode())
+ .findFirst()
+ .orElseThrow(NoSuchElementException::new));
+
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+
+ final LeaseManagementConfig.WorkerMetricsTableConfig resolvedWorkerMetricsTableConfig = resolvedConfiguration
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig();
+
+ assertNotEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig);
+
+ // apart from the single updated configuration, all other config values should be equal to the default
+ resolvedWorkerMetricsTableConfig.billingMode(defaultWorkerMetricsTableConfig.billingMode());
+ assertEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig);
+ }
+
+ @Test
+ public void testCoordinatorStateTableConfigBean() {
+ final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PAY_PER_REQUEST;
+
+ MultiLangDaemonConfiguration configuration = baseConfiguration();
+
+ configuration.setCoordinatorStateTableName("testTable");
+ configuration.setCoordinatorStateBillingMode(testWorkerMetricsTableBillingMode);
+ configuration.setCoordinatorStateReadCapacity(123);
+ configuration.setCoordinatorStateWriteCapacity(123);
+
+ MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+ CoordinatorConfig coordinatorConfig = resolvedConfiguration.getCoordinatorConfig();
+ CoordinatorConfig.CoordinatorStateTableConfig coordinatorStateConfig =
+ coordinatorConfig.coordinatorStateTableConfig();
+ assertEquals(coordinatorStateConfig.tableName(), "testTable");
+ assertEquals(coordinatorStateConfig.billingMode(), testWorkerMetricsTableBillingMode);
+ assertEquals(coordinatorStateConfig.readCapacity(), 123);
+ assertEquals(coordinatorStateConfig.writeCapacity(), 123);
+ }
+
+ @Test
+ public void testCoordinatorStateTableConfigUsesDefaults() {
+ final CoordinatorConfig.CoordinatorStateTableConfig defaultCoordinatorStateTableConfig =
+ getTestConfigsBuilder().coordinatorConfig().coordinatorStateTableConfig();
+
+ final MultiLangDaemonConfiguration configuration = baseConfiguration();
+ configuration.setCoordinatorStateWriteCapacity(defaultCoordinatorStateTableConfig.writeCapacity() + 12345);
+
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+
+ final CoordinatorConfig.CoordinatorStateTableConfig resolvedCoordinatorStateTableConfig =
+ resolvedConfiguration.coordinatorConfig.coordinatorStateTableConfig();
+
+ assertNotEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig);
+
+ // apart from the single updated configuration, all other config values should be equal to the default
+ resolvedCoordinatorStateTableConfig.writeCapacity(defaultCoordinatorStateTableConfig.writeCapacity());
+ assertEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig);
+ }
+
@Test
public void testSetLeaseTablePitrEnabledToTrue() {
MultiLangDaemonConfiguration configuration = baseConfiguration();
@@ -266,4 +471,43 @@ public void testFanoutConfigSetConsumerName() {
assertThat(fanOutConfig.consumerArn(), equalTo(consumerArn));
}
+
+ @Test
+ public void testClientVersionConfig() {
+ final CoordinatorConfig.ClientVersionConfig testClientVersionConfig =
+ CoordinatorConfig.ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X;
+
+ final MultiLangDaemonConfiguration configuration = baseConfiguration();
+ configuration.setClientVersionConfig(testClientVersionConfig);
+
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ configuration.resolvedConfiguration(shardRecordProcessorFactory);
+
+ final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig;
+
+ assertEquals(testClientVersionConfig, coordinatorConfig.clientVersionConfig());
+ }
+
+ @Test
+ public void testClientVersionConfigUsesDefault() {
+ final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
+ baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory);
+
+ final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig;
+
+ assertEquals(
+ getTestConfigsBuilder().coordinatorConfig().clientVersionConfig(),
+ coordinatorConfig.clientVersionConfig());
+ }
+
+ private ConfigsBuilder getTestConfigsBuilder() {
+ return new ConfigsBuilder(
+ DUMMY_STREAM_NAME,
+ DUMMY_APPLICATION_NAME,
+ Mockito.mock(KinesisAsyncClient.class),
+ Mockito.mock(DynamoDbAsyncClient.class),
+ Mockito.mock(CloudWatchAsyncClient.class),
+ "dummyWorkerIdentifier",
+ shardRecordProcessorFactory);
+ }
}
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/PropertiesMappingE2ETest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/PropertiesMappingE2ETest.java
new file mode 100644
index 000000000..8a5c7c347
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/PropertiesMappingE2ETest.java
@@ -0,0 +1,303 @@
+package software.amazon.kinesis.multilang.config;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.junit.jupiter.api.Test;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.Tag;
+import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
+import software.amazon.kinesis.multilang.MultiLangDaemonConfig;
+import software.amazon.kinesis.multilang.config.MultiLangDaemonConfiguration.ResolvedConfiguration;
+import software.amazon.kinesis.processor.ShardRecordProcessor;
+import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class PropertiesMappingE2ETest {
+ private static final String PROPERTIES_FILE = "multilang.properties";
+ private static final String PROPERTIES_FILE_V3 = "multilangv3.properties";
+
+ @Test
+ public void testKclV3PropertiesMapping() throws IOException {
+ final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE);
+
+ final ResolvedConfiguration kclV3Config =
+ config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory());
+
+ assertEquals(
+ ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X,
+ kclV3Config.coordinatorConfig.clientVersionConfig());
+
+ assertEquals(
+ "MultiLangTest-CoordinatorState-CustomName",
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName());
+ assertEquals(
+ BillingMode.PROVISIONED,
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode());
+ assertEquals(
+ 1000,
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().readCapacity());
+ assertEquals(
+ 500, kclV3Config.coordinatorConfig.coordinatorStateTableConfig().writeCapacity());
+ assertTrue(kclV3Config.coordinatorConfig.coordinatorStateTableConfig().pointInTimeRecoveryEnabled());
+ assertTrue(kclV3Config.coordinatorConfig.coordinatorStateTableConfig().deletionProtectionEnabled());
+ assertEquals(
+ Arrays.asList(
+ Tag.builder().key("csTagK1").value("csTagV1").build(),
+ Tag.builder().key("csTagK2").value("csTagV2").build(),
+ Tag.builder().key("csTagK3").value("csTagV3").build()),
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tags());
+
+ assertEquals(
+ 10000L,
+ kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis());
+ assertFalse(
+ kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled());
+
+ assertEquals(
+ 5000L,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .inMemoryWorkerMetricsCaptureFrequencyMillis());
+ assertEquals(
+ 60000L,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsReporterFreqInMillis());
+ assertEquals(
+ 50,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .noOfPersistedMetricsPerWorkerMetrics());
+ assertTrue(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .disableWorkerMetrics());
+ assertEquals(
+ 10000,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .maxThroughputPerHostKBps());
+ assertEquals(
+ 90,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .dampeningPercentage());
+ assertEquals(
+ 5,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .reBalanceThresholdPercentage());
+ assertFalse(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .allowThroughputOvershoot());
+ assertEquals(
+ Duration.ofHours(12),
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .staleWorkerMetricsEntryCleanupDuration());
+ assertEquals(
+ 5,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .varianceBalancingFrequency());
+ assertEquals(
+ 0.18D,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsEMAAlpha());
+
+ assertEquals(
+ "MultiLangTest-WorkerMetrics-CustomName",
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .tableName());
+ assertEquals(
+ BillingMode.PROVISIONED,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .billingMode());
+ assertEquals(
+ 250,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .readCapacity());
+ assertEquals(
+ 90,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .writeCapacity());
+ assertTrue(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .pointInTimeRecoveryEnabled());
+ assertTrue(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .deletionProtectionEnabled());
+ assertEquals(
+ Arrays.asList(
+ Tag.builder().key("wmTagK1").value("wmTagV1").build(),
+ Tag.builder().key("wmTagK2").value("wmTagV2").build()),
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .tags());
+ }
+
+ @Test
+ public void testKclV3PropertiesMappingForDefaultValues() throws IOException {
+ final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE_V3);
+
+ final ResolvedConfiguration kclV3Config =
+ config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory());
+
+ assertEquals(ClientVersionConfig.CLIENT_VERSION_CONFIG_3X, kclV3Config.coordinatorConfig.clientVersionConfig());
+
+ assertEquals(
+ "MultiLangTest-CoordinatorState",
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName());
+ assertEquals(
+ BillingMode.PAY_PER_REQUEST,
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode());
+ assertFalse(kclV3Config.coordinatorConfig.coordinatorStateTableConfig().pointInTimeRecoveryEnabled());
+ assertFalse(kclV3Config.coordinatorConfig.coordinatorStateTableConfig().deletionProtectionEnabled());
+ assertEquals(
+ Collections.emptyList(),
+ kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tags());
+
+ assertEquals(
+ 30_000L,
+ kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis());
+ assertTrue(
+ kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled());
+
+ assertEquals(
+ 1000L,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .inMemoryWorkerMetricsCaptureFrequencyMillis());
+ assertEquals(
+ 30000L,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsReporterFreqInMillis());
+ assertEquals(
+ 10,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .noOfPersistedMetricsPerWorkerMetrics());
+ assertFalse(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .disableWorkerMetrics());
+ assertEquals(
+ Double.MAX_VALUE,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .maxThroughputPerHostKBps());
+ assertEquals(
+ 60,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .dampeningPercentage());
+ assertEquals(
+ 10,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .reBalanceThresholdPercentage());
+ assertTrue(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .allowThroughputOvershoot());
+ assertEquals(
+ Duration.ofDays(1),
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .staleWorkerMetricsEntryCleanupDuration());
+ assertEquals(
+ 3,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .varianceBalancingFrequency());
+ assertEquals(
+ 0.5D,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsEMAAlpha());
+
+ assertEquals(
+ "MultiLangTest-WorkerMetricStats",
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .tableName());
+ assertEquals(
+ BillingMode.PAY_PER_REQUEST,
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .billingMode());
+ assertFalse(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .pointInTimeRecoveryEnabled());
+ assertFalse(kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .deletionProtectionEnabled());
+ assertEquals(
+ Collections.emptyList(),
+ kclV3Config
+ .leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .workerMetricsTableConfig()
+ .tags());
+ }
+
+ private static class TestRecordProcessorFactory implements ShardRecordProcessorFactory {
+ @Override
+ public ShardRecordProcessor shardRecordProcessor() {
+ return null;
+ }
+ }
+}
diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBeanTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBeanTest.java
new file mode 100644
index 000000000..71ada01f1
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBeanTest.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.multilang.config;
+
+import java.util.Optional;
+
+import org.apache.commons.beanutils.BeanUtilsBean;
+import org.apache.commons.beanutils.ConvertUtilsBean;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
+import software.amazon.kinesis.retrieval.polling.PollingConfig;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+@RunWith(MockitoJUnitRunner.class)
+public class WorkerUtilizationAwareAssignmentConfigBeanTest {
+
+ @Mock
+ private KinesisAsyncClient kinesisAsyncClient;
+
+ @Test
+ public void testAllPropertiesTransit() {
+ PollingConfigBean pollingConfigBean = new PollingConfigBean();
+ pollingConfigBean.setIdleTimeBetweenReadsInMillis(1000);
+ pollingConfigBean.setMaxGetRecordsThreadPool(20);
+ pollingConfigBean.setMaxRecords(5000);
+ pollingConfigBean.setRetryGetRecordsInSeconds(30);
+
+ ConvertUtilsBean convertUtilsBean = new ConvertUtilsBean();
+ BeanUtilsBean utilsBean = new BeanUtilsBean(convertUtilsBean);
+
+ MultiLangDaemonConfiguration multiLangDaemonConfiguration =
+ new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
+ multiLangDaemonConfiguration.setStreamName("test-stream");
+
+ PollingConfig pollingConfig = pollingConfigBean.build(kinesisAsyncClient, multiLangDaemonConfiguration);
+
+ assertThat(pollingConfig.kinesisClient(), equalTo(kinesisAsyncClient));
+ assertThat(pollingConfig.streamName(), equalTo(multiLangDaemonConfiguration.getStreamName()));
+ assertThat(
+ pollingConfig.idleTimeBetweenReadsInMillis(),
+ equalTo(pollingConfigBean.getIdleTimeBetweenReadsInMillis()));
+ assertThat(
+ pollingConfig.maxGetRecordsThreadPool(),
+ equalTo(Optional.of(pollingConfigBean.getMaxGetRecordsThreadPool())));
+ assertThat(pollingConfig.maxRecords(), equalTo(pollingConfigBean.getMaxRecords()));
+ assertThat(
+ pollingConfig.retryGetRecordsInSeconds(),
+ equalTo(Optional.of(pollingConfigBean.getRetryGetRecordsInSeconds())));
+ }
+}
diff --git a/amazon-kinesis-client-multilang/src/test/resources/multilang.properties b/amazon-kinesis-client-multilang/src/test/resources/multilang.properties
index 34cb0c1a3..79ec03380 100644
--- a/amazon-kinesis-client-multilang/src/test/resources/multilang.properties
+++ b/amazon-kinesis-client-multilang/src/test/resources/multilang.properties
@@ -17,10 +17,12 @@ streamName = kclpysample
applicationName = MultiLangTest
# Users can change the credentials provider the KCL will use to retrieve credentials.
-# The DefaultAWSCredentialsProviderChain checks several other providers, which is
+# Expected key name (case-sensitive):
+# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
+# The DefaultCredentialsProvider checks several other providers, which is
# described here:
-# http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
-AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
+# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
+AwsCredentialsProvider = DefaultCredentialsProvider
# Appended to the user agent of the KCL. Does not impact the functionality of the
# KCL in any other way.
@@ -91,3 +93,83 @@ validateSequenceNumberBeforeCheckpointing = true
# active threads set to the provided value. If a non-positive integer or no
# value is provided a CachedThreadPool is used.
maxActiveThreads = -1
+
+################### KclV3 configurations ###################
+# NOTE : These are just test configurations to show how to customize
+# all possible KCLv3 configurations. They are not necessarily the best
+# default values to use for production.
+
+# Coordinator config
+# Version the KCL needs to operate in. For more details check the KCLv3 migration
+# documentation. Default is CLIENT_VERSION_CONFIG_3X
+clientVersionConfig = CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2x
+# Configurations to control how the CoordinatorState DDB table is created
+# Default name is applicationName-CoordinatorState in PAY_PER_REQUEST,
+# with PITR and deletion protection disabled and no tags
+coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName
+coordinatorStateBillingMode = PROVISIONED
+coordinatorStateReadCapacity = 1000
+coordinatorStateWriteCapacity = 500
+coordinatorStatePointInTimeRecoveryEnabled = true
+coordinatorStateDeletionProtectionEnabled = true
+coordinatorStateTags = csTagK1=csTagV1,csTagK2=csTagV2,csTagK3=csTagV3
+
+# Graceful handoff config - tuning of the shutdown behavior during lease transfers
+# default values are 30000 and true respectively
+gracefulLeaseHandoffTimeoutMillis = 10000
+isGracefulLeaseHandoffEnabled = false
+
+# WorkerMetricStats table config - control how the DDB table is created
+# Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST,
+# with PITR and deletion protection disabled and no tags
+workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName
+workerMetricsBillingMode = PROVISIONED
+workerMetricsReadCapacity = 250
+workerMetricsWriteCapacity = 90
+workerMetricsPointInTimeRecoveryEnabled = true
+workerMetricsDeletionProtectionEnabled = true
+workerMetricsTags = wmTagK1=wmTagV1,wmTagK2=wmTagV2
+
+# WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
+#
+# frequency of capturing worker metrics in memory. Default is 1s
+inMemoryWorkerMetricsCaptureFrequencyMillis = 5000
+# frequency of reporting worker metric stats to storage. Default is 30s
+workerMetricsReporterFreqInMillis = 60000
+# No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10
+noOfPersistedMetricsPerWorkerMetrics = 50
+# Disable use of worker metrics to balance lease, default is false.
+# If it is true, the algorithm balances lease based on worker's processing throughput.
+disableWorkerMetrics = true
+# Max throughput per host 10 MBps, to limit processing to the given value
+# Default is unlimited.
+maxThroughputPerHostKBps = 10000
+# Dampen the load that is rebalanced during lease re-balancing, default is 60%
+dampeningPercentage = 90
+# Configures the allowed variance range for worker utilization. The upper
+# limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
+# The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
+# any worker's utilization falls outside this range, lease re-balancing is
+# triggered. The re-balancing algorithm aims to bring variance within the
+# specified range. It also avoids thrashing by ensuring the utilization of
+# the worker receiving the load after re-balancing doesn't exceed the fleet
+# average. This might cause no re-balancing action even the utilization is
+# out of the variance range. The default value is 10, representing +/-10%
+# variance from the average value.
+reBalanceThresholdPercentage = 5
+# Whether at-least one lease must be taken from a high utilization worker
+# during re-balancing when there is no lease assigned to that worker which has
+# throughput is less than or equal to the minimum throughput that needs to be
+# moved away from that worker to bring the worker back into the allowed variance.
+# Default is true.
+allowThroughputOvershoot = false
+# Lease assignment is performed every failoverTimeMillis but re-balance will
+# be attempted only once in 5 times based on the below config. Default is 3.
+varianceBalancingFrequency = 5
+# Alpha value used for calculating exponential moving average of worker's metricStats.
+workerMetricsEMAAlpha = 0.18
+# Duration after which workerMetricStats entry from WorkerMetricStats table will
+# be cleaned up.
+# Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
+# Refer to Duration.parse javadocs for more details
+staleWorkerMetricsEntryCleanupDuration = PT12H
diff --git a/amazon-kinesis-client-multilang/src/test/resources/multilangv3.properties b/amazon-kinesis-client-multilang/src/test/resources/multilangv3.properties
new file mode 100644
index 000000000..42648da0f
--- /dev/null
+++ b/amazon-kinesis-client-multilang/src/test/resources/multilangv3.properties
@@ -0,0 +1,100 @@
+# The script that abides by the multi-language protocol. This script will
+# be executed by the MultiLangDaemon, which will communicate with this script
+# over STDIN and STDOUT according to the multi-language protocol.
+executableName = sample_kclpy_app.py
+
+# The Stream arn: arn:aws:kinesis:::stream/
+# Important: streamArn takes precedence over streamName if both are set
+streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample
+
+# The name of an Amazon Kinesis stream to process.
+# Important: streamArn takes precedence over streamName if both are set
+streamName = kclpysample
+
+# Used by the KCL as the name of this application. Will be used as the name
+# of an Amazon DynamoDB table which will store the lease and checkpoint
+# information for workers with this application name
+applicationName = MultiLangTest
+
+# Users can change the credentials provider the KCL will use to retrieve credentials.
+# Expected key name (case-sensitive):
+# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
+# The DefaultCredentialsProvider checks several other providers, which is
+# described here:
+# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
+AwsCredentialsProvider = DefaultCredentialsProvider
+
+# Appended to the user agent of the KCL. Does not impact the functionality of the
+# KCL in any other way.
+processingLanguage = python/3.8
+
+# Valid options at TRIM_HORIZON or LATEST.
+# See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
+initialPositionInStream = TRIM_HORIZON
+
+# To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended',
+# and uncomment below line with right timestamp value.
+# See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
+#initialPositionInStreamExtended = 1636609142
+
+# The following properties are also available for configuring the KCL Worker that is created
+# by the MultiLangDaemon.
+
+# The KCL defaults to us-east-1
+regionName = us-east-1
+
+# Fail over time in milliseconds. A worker which does not renew it's lease within this time interval
+# will be regarded as having problems and it's shards will be assigned to other workers.
+# For applications that have a large number of shards, this msy be set to a higher number to reduce
+# the number of DynamoDB IOPS required for tracking leases
+failoverTimeMillis = 10000
+
+# A worker id that uniquely identifies this worker among all workers using the same applicationName
+# If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself.
+workerId = "workerId"
+
+# Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks.
+shardSyncIntervalMillis = 60000
+
+# Max records to fetch from Kinesis in a single GetRecords call.
+maxRecords = 10000
+
+# Idle time between record reads in milliseconds.
+idleTimeBetweenReadsInMillis = 1000
+
+# Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while)
+callProcessRecordsEvenForEmptyRecordList = false
+
+# Interval in milliseconds between polling to check for parent shard completion.
+# Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on
+# completion of parent shards).
+parentShardPollIntervalMillis = 10000
+
+# Cleanup leases upon shards completion (don't wait until they expire in Kinesis).
+# Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try
+# to delete the ones we don't need any longer.
+cleanupLeasesUponShardCompletion = true
+
+# Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures).
+taskBackoffTimeMillis = 500
+
+# Buffer metrics for at most this long before publishing to CloudWatch.
+metricsBufferTimeMillis = 10000
+
+# Buffer at most this many metrics before publishing to CloudWatch.
+metricsMaxQueueSize = 10000
+
+# KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls
+# to RecordProcessorCheckpointer#checkpoint(String) by default.
+validateSequenceNumberBeforeCheckpointing = true
+
+# The maximum number of active threads for the MultiLangDaemon to permit.
+# If a value is provided then a FixedThreadPool is used with the maximum
+# active threads set to the provided value. If a non-positive integer or no
+# value is provided a CachedThreadPool is used.
+maxActiveThreads = -1
+
+################### KclV3 configurations ###################
+# Coordinator config
+clientVersionConfig = CLIENT_VERSION_CONFIG_3x
+## Let all other KCLv3 config use defaults
\ No newline at end of file
diff --git a/amazon-kinesis-client/pom.xml b/amazon-kinesis-client/pom.xml
index b2efcae0a..567e0cdd1 100644
--- a/amazon-kinesis-client/pom.xml
+++ b/amazon-kinesis-client/pom.xml
@@ -23,7 +23,7 @@
software.amazon.kinesis
amazon-kinesis-client-pom
- 2.6.1-SNAPSHOT
+ 3.0.0
amazon-kinesis-client
@@ -68,6 +68,18 @@
dynamodb
${awssdk.version}
+
+
+ software.amazon.awssdk
+ dynamodb-enhanced
+ ${awssdk.version}
+
+
+
+ com.amazonaws
+ dynamodb-lock-client
+ 1.3.0
+
software.amazon.awssdk
cloudwatch
@@ -82,6 +94,12 @@
software.amazon.glue
schema-registry-serde
${gsr.version}
+
+
+ com.amazonaws
+ aws-java-sdk-sts
+
+
software.amazon.glue
@@ -103,11 +121,23 @@
commons-lang3
3.14.0
+
+
+ commons-collections
+ commons-collections
+ 3.2.2
+
org.slf4j
slf4j-api
${slf4j.version}
+
+
+ org.jetbrains
+ annotations
+ 26.0.1
+
io.reactivex.rxjava3
@@ -123,35 +153,47 @@
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ 5.11.3
+ test
+
junit
junit
4.13.2
test
-
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ 5.11.3
+ test
+
+
org.mockito
- mockito-all
- 1.10.19
+ mockito-junit-jupiter
+ 3.12.4
test
-
org.hamcrest
hamcrest-all
1.3
test
-
-
-
-
-
-
-
-
-
+
+
+
+ com.amazonaws
+ DynamoDBLocal
+ 1.25.0
+ test
+
ch.qos.logback
logback-classic
@@ -162,11 +204,11 @@
-
-
-
-
-
+
+
+
+
+
@@ -203,20 +245,20 @@
-
- org.xolstice.maven.plugins
- protobuf-maven-plugin
- 0.6.1
-
-
-
- compile
-
-
-
-
- com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
-
+
+ org.xolstice.maven.plugins
+ protobuf-maven-plugin
+ 0.6.1
+
+
+
+ compile
+
+
+
+
+ com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
+
org.apache.maven.plugins
diff --git a/amazon-kinesis-client/scripts/KclMigrationTool.py b/amazon-kinesis-client/scripts/KclMigrationTool.py
new file mode 100644
index 000000000..e72cd0a0a
--- /dev/null
+++ b/amazon-kinesis-client/scripts/KclMigrationTool.py
@@ -0,0 +1,609 @@
+"""
+Copyright 2024 Amazon.com, Inc. or its affiliates.
+Licensed under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import argparse
+import time
+
+from enum import Enum
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError
+
+# DynamoDB table suffixes
+DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX = "-CoordinatorState"
+DEFAULT_WORKER_METRICS_TABLE_SUFFIX = "-WorkerMetricStats"
+
+# DynamoDB attribute names and values
+CLIENT_VERSION_ATTR = 'cv'
+TIMESTAMP_ATTR = 'mts'
+MODIFIED_BY_ATTR = 'mb'
+HISTORY_ATTR = 'h'
+MIGRATION_KEY = "Migration3.0"
+
+# GSI constants
+GSI_NAME = 'LeaseOwnerToLeaseKeyIndex'
+GSI_DELETION_WAIT_TIME_SECONDS = 120
+
+config = Config(
+ retries = {
+ 'max_attempts': 10,
+ 'mode': 'standard'
+ }
+)
+
+class KclClientVersion(Enum):
+ VERSION_2X = "CLIENT_VERSION_2X"
+ UPGRADE_FROM_2X = "CLIENT_VERSION_UPGRADE_FROM_2X"
+ VERSION_3X_WITH_ROLLBACK = "CLIENT_VERSION_3X_WITH_ROLLBACK"
+ VERSION_3X = "CLIENT_VERSION_3X"
+
+ def __str__(self):
+ return self.value
+
+
+def get_time_in_millis():
+ return str(round(time.time() * 1000))
+
+
+def is_valid_version(version, mode):
+ """
+ Validate if the given version is valid for the specified mode
+
+ :param version: The KCL client version to validate
+ :param mode: Either 'rollback' or 'rollforward'
+ :return: True if the version is valid for the given mode, False otherwise
+ """
+ if mode == 'rollback':
+ if version == KclClientVersion.VERSION_2X.value:
+ print("Your KCL application already runs in a mode compatible with KCL 2.x. You can deploy the code with the previous KCL version if you still experience an issue.")
+ return True
+ if version in [KclClientVersion.UPGRADE_FROM_2X.value,
+ KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]:
+ return True
+ if version == KclClientVersion.VERSION_3X.value:
+ print("Cannot roll back the KCL application."
+ " It is not in a state that supports rollback.")
+ return False
+ print("Migration to KCL 3.0 not in progress or application_name / coordinator_state_table_name is incorrect."
+ " Please double check and run again with correct arguments.")
+ return False
+
+ if mode == 'rollforward':
+ if version == KclClientVersion.VERSION_2X.value:
+ return True
+ if version in [KclClientVersion.UPGRADE_FROM_2X.value,
+ KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]:
+ print("Cannot roll-forward application. It is not in a rolled back state.")
+ return False
+ if version == KclClientVersion.VERSION_3X.value:
+ print("Cannot roll-forward the KCL application."
+ " Application has already migrated.")
+ return False
+ print("Cannot roll-forward because migration to KCL 3.0 is not in progress or application_name"
+ " / coordinator_state_table_name is incorrect. Please double check and run again with correct arguments.")
+ return False
+ print(f"Invalid mode: {mode}. Mode must be either 'rollback' or 'rollforward'.")
+ return False
+
+
+def handle_get_item_client_error(e, operation, table_name):
+ """
+ Handle ClientError exceptions raised by get_item on given DynamoDB table
+
+ :param e: The ClientError exception object
+ :param operation: Rollback or Roll-forward for logging the errors
+ :param table_name: The name of the DynamoDB table where the error occurred
+ """
+ error_code = e.response['Error']['Code']
+ error_message = e.response['Error']['Message']
+ print(f"{operation} could not be performed.")
+ if error_code == 'ProvisionedThroughputExceededException':
+ print(f"Throughput exceeded even after retries: {error_message}")
+ else:
+ print(f"Unexpected client error occurred: {error_code} - {error_message}")
+ print("Please resolve the issue and run the KclMigrationTool again.")
+
+
+def table_exists(dynamodb_client, table_name):
+ """
+ Check if a DynamoDB table exists.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param table_name: Name of the DynamoDB table to check
+ :return: True if the table exists, False otherwise
+ """
+ try:
+ dynamodb_client.describe_table(TableName=table_name)
+ return True
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'ResourceNotFoundException':
+ print(f"Table '{table_name}' does not exist.")
+ return False
+ print(f"An error occurred while checking table '{table_name}': {e}.")
+ return False
+
+
+def validate_tables(dynamodb_client, operation, coordinator_state_table_name, lease_table_name=None):
+ """
+ Validate the existence of DynamoDB tables required for KCL operations
+
+ :param dynamodb_client: A boto3 DynamoDB client object
+ :param operation: Rollback or Roll-forward for logging
+ :param coordinator_state_table_name: Name of the coordinator state table
+ :param lease_table_name: Name of the DynamoDB KCL lease table (optional)
+ :return: True if all required tables exist, False otherwise
+ """
+ if lease_table_name and not table_exists(dynamodb_client, lease_table_name):
+ print(
+ f"{operation} failed. Could not find a KCL Application DDB lease table "
+ f"with name {lease_table_name}. Please pass in the correct application_name "
+ "and/or lease_table_name that matches your KCL application configuration."
+ )
+ return False
+
+ if not table_exists(dynamodb_client, coordinator_state_table_name):
+ print(
+ f"{operation} failed. Could not find a coordinator state table "
+ f"{coordinator_state_table_name}. Please pass in the correct application_name or"
+ f" coordinator_state_table_name that matches your KCL application configuration."
+ )
+ return False
+
+ return True
+
+
+def add_current_state_to_history(item, max_history=10):
+ """
+ Adds the current state of a DynamoDB item to its history attribute.
+ Creates a new history entry from the current value and maintains a capped history list.
+
+ :param item: DynamoDB item to add history to
+ :param max_history: Maximum number of history entries to maintain (default: 10)
+ :return: Updated history attribute as a DynamoDB-formatted dictionary
+ """
+ # Extract current values
+ current_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown')
+ current_modified_by = item.get(MODIFIED_BY_ATTR, {}).get('S', 'Unknown')
+ current_time_in_millis = (
+ item.get(TIMESTAMP_ATTR, {}).get('N', get_time_in_millis())
+ )
+
+ # Create new history entry
+ new_entry = {
+ 'M': {
+ CLIENT_VERSION_ATTR: {'S': current_version},
+ MODIFIED_BY_ATTR: {'S': current_modified_by},
+ TIMESTAMP_ATTR: {'N': current_time_in_millis}
+ }
+ }
+
+ # Get existing history or create new if doesn't exist
+ history_dict = item.get(f'{HISTORY_ATTR}', {'L': []})
+ history_list = history_dict['L']
+
+ # Add new entry to the beginning of the list, capping at max_history
+ history_list.insert(0, new_entry)
+ history_list = history_list[:max_history]
+
+ return history_dict
+
+
+def get_current_state(dynamodb_client, table_name):
+ """
+ Retrieve the current state from the DynamoDB table and prepare history update.
+ Fetches the current item from the specified DynamoDB table,
+ extracts the initial client version, and creates a new history entry.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param table_name: Name of the DynamoDB table to query
+ :return: A tuple containing:
+ - initial_version (str): The current client version, or 'Unknown' if not found
+ - new_history (dict): Updated history including the current state
+ """
+ response = dynamodb_client.get_item(
+ TableName=table_name,
+ Key={'key': {'S': MIGRATION_KEY}}
+ )
+ item = response.get('Item', {})
+ initial_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown')
+ new_history = add_current_state_to_history(item)
+ return initial_version, new_history
+
+
+def rollback_client_version(dynamodb_client, table_name, history):
+ """
+ Update the client version in the coordinator state table to initiate rollback.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param table_name: Name of the coordinator state DDB table
+ :param history: Updated history attribute as a DynamoDB-formatted dictionary
+ :return: A tuple containing:
+ - success (bool): True if client version was successfully updated, False otherwise
+ - previous_version (str): The version that was replaced, or None if update failed
+ """
+ try:
+ print(f"Rolling back client version in table '{table_name}'...")
+ update_response = dynamodb_client.update_item(
+ TableName=table_name,
+ Key={'key': {'S': MIGRATION_KEY}},
+ UpdateExpression=(
+ f"SET {CLIENT_VERSION_ATTR} = :rollback_client_version, "
+ f"{TIMESTAMP_ATTR} = :updated_at, "
+ f"{MODIFIED_BY_ATTR} = :modifier, "
+ f"{HISTORY_ATTR} = :history"
+ ),
+ ConditionExpression=(
+ f"{CLIENT_VERSION_ATTR} IN ("
+ ":upgrade_from_2x_client_version, "
+ ":3x_with_rollback_client_version)"
+ ),
+ ExpressionAttributeValues={
+ ':rollback_client_version': {'S': KclClientVersion.VERSION_2X.value},
+ ':updated_at': {'N': get_time_in_millis()},
+ ':modifier': {'S': 'KclMigrationTool-rollback'},
+ ':history': history,
+ ':upgrade_from_2x_client_version': (
+ {'S': KclClientVersion.UPGRADE_FROM_2X.value}
+ ),
+ ':3x_with_rollback_client_version': (
+ {'S': KclClientVersion.VERSION_3X_WITH_ROLLBACK.value}
+ ),
+ },
+ ReturnValues='UPDATED_OLD'
+ )
+ replaced_item = update_response.get('Attributes', {})
+ replaced_version = replaced_item.get('cv', {}).get('S', '')
+ return True, replaced_version
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'ConditionalCheckFailedException':
+ print("Unable to rollback, as application is not in a state that allows rollback."
+ "Ensure that the given application_name or coordinator_state_table_name is correct and"
+ " you have followed all prior migration steps.")
+ else:
+ print(f"An unexpected error occurred while rolling back: {str(e)}"
+ "Please resolve and run this migration script again.")
+ return False, None
+
+
+def rollfoward_client_version(dynamodb_client, table_name, history):
+ """
+ Update the client version in the coordinator state table to initiate roll-forward
+ conditionally if application is currently in rolled back state.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param table_name: Name of the coordinator state DDB table
+ :param history: Updated history attribute as a DynamoDB-formatted dictionary
+ :return: True if client version was successfully updated, False otherwise
+ """
+ try:
+ # Conditionally update client version
+ dynamodb_client.update_item(
+ TableName=table_name,
+ Key={'key': {'S': MIGRATION_KEY}},
+ UpdateExpression= (
+ f"SET {CLIENT_VERSION_ATTR} = :rollforward_version, "
+ f"{TIMESTAMP_ATTR} = :updated_at, "
+ f"{MODIFIED_BY_ATTR} = :modifier, "
+ f"{HISTORY_ATTR} = :new_history"
+ ),
+ ConditionExpression=f"{CLIENT_VERSION_ATTR} = :kcl_2x_version",
+ ExpressionAttributeValues={
+ ':rollforward_version': {'S': KclClientVersion.UPGRADE_FROM_2X.value},
+ ':updated_at': {'N': get_time_in_millis()},
+ ':modifier': {'S': 'KclMigrationTool-rollforward'},
+ ':new_history': history,
+ ':kcl_2x_version': {'S': KclClientVersion.VERSION_2X.value},
+ }
+ )
+ print("Roll-forward has been initiated. KCL application will monitor for 3.0 readiness and"
+ " automatically switch to 3.0 functionality when readiness criteria have been met.")
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'ConditionalCheckFailedException':
+ print("Unable to roll-forward because application is not in rolled back state."
+ " Ensure that the given application_name or coordinator_state_table_name is correct"
+ " and you have followed all prior migration steps.")
+ else:
+ print(f"Unable to roll-forward due to error: {str(e)}. "
+ "Please resolve and run this migration script again.")
+ except Exception as e:
+ print(f"Unable to roll-forward due to error: {str(e)}. "
+ "Please resolve and run this migration script again.")
+
+
+def delete_gsi_if_exists(dynamodb_client, table_name):
+ """
+ Deletes GSI on given lease table if it exists.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param table_name: Name of lease table to remove GSI from
+ """
+ try:
+ gsi_present = False
+ response = dynamodb_client.describe_table(TableName=table_name)
+ if 'GlobalSecondaryIndexes' in response['Table']:
+ gsi_list = response['Table']['GlobalSecondaryIndexes']
+ for gsi in gsi_list:
+ if gsi['IndexName'] == GSI_NAME:
+ gsi_present = True
+ break
+
+ if not gsi_present:
+ print(f"GSI {GSI_NAME} is not present on lease table {table_name}. It may already be successfully"
+ " deleted. Or if lease table name is incorrect, please re-run the KclMigrationTool with correct"
+ " application_name or lease_table_name.")
+ return
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'ResourceNotFoundException':
+ print(f"Lease table {table_name} does not exist, please check application_name or lease_table_name"
+ " configuration and try again.")
+ return
+ else:
+ print(f"An unexpected error occurred while checking if GSI {GSI_NAME} exists"
+ f" on lease table {table_name}: {str(e)}. Please rectify the error and try again.")
+ return
+
+ print(f"Deleting GSI '{GSI_NAME}' from table '{table_name}'...")
+ try:
+ dynamodb_client.update_table(
+ TableName=table_name,
+ GlobalSecondaryIndexUpdates=[
+ {
+ 'Delete': {
+ 'IndexName': GSI_NAME
+ }
+ }
+ ]
+ )
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'ResourceNotFoundException':
+ print(f"{GSI_NAME} not found or table '{table_name}' not found.")
+ elif e.response['Error']['Code'] == 'ResourceInUseException':
+ print(f"Unable to delete GSI: '{table_name}' is currently being modified.")
+ except Exception as e:
+ print(f"An unexpected error occurred while deleting GSI {GSI_NAME} on lease table {table_name}: {str(e)}."
+ " Please manually confirm the GSI is removed from the lease table, or"
+ " resolve the error and rerun the migration script.")
+
+
+def delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name):
+ """
+ Deletes worker metrics table based on application name, if it exists.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param worker_metrics_table_name: Name of the DynamoDB worker metrics table
+ """
+ try:
+ dynamodb_client.describe_table(TableName=worker_metrics_table_name)
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'ResourceNotFoundException':
+ print(f"Worker metrics table {worker_metrics_table_name} does not exist."
+ " It may already be successfully deleted. Please check that the application_name"
+ " or worker_metrics_table_name is correct. If not, correct this and rerun the migration script.")
+ return
+ else:
+ print(f"An unexpected error occurred when checking if {worker_metrics_table_name} table exists: {str(e)}."
+ " Please manually confirm the table is deleted, or resolve the error"
+ " and rerun the migration script.")
+ return
+
+ print(f"Deleting worker metrics table {worker_metrics_table_name}...")
+ try:
+ dynamodb_client.delete_table(TableName=worker_metrics_table_name)
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'AccessDeniedException':
+ print(f"No permissions to delete table {worker_metrics_table_name}. Please manually delete it if you"
+ " want to avoid any charges until you are ready to rollforward with migration.")
+ else:
+ print(f"An unexpected client error occurred while deleting worker metrics table: {str(e)}."
+ " Please manually confirm the table is deleted, or resolve the error"
+ " and rerun the migration script.")
+ except Exception as e:
+ print(f"An unexpected error occurred while deleting worker metrics table: {str(e)}."
+ " Please manually confirm the table is deleted, or resolve the error"
+ " and rerun the migration script.")
+
+
+def perform_rollback(dynamodb_client, lease_table_name, coordinator_state_table_name, worker_metrics_table_name):
+ """
+ Perform KCL 3.0 migration rollback by updating MigrationState for the KCL application.
+ Rolls client version back, removes GSI from lease table, deletes worker metrics table.
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param coordinator_state_table_name: Name of the DynamoDB coordinator state table
+ :param coordinator_state_table_name: Name of the DynamoDB coordinator state table
+ :param worker_metrics_table_name: Name of the DynamoDB worker metrics table
+ """
+ if not validate_tables(dynamodb_client, "Rollback", coordinator_state_table_name, lease_table_name):
+ return
+
+ try:
+ initial_version, new_history = get_current_state(dynamodb_client,
+ coordinator_state_table_name)
+ except ClientError as e:
+ handle_get_item_client_error(e, "Rollback", coordinator_state_table_name)
+ return
+
+ if not is_valid_version(version=initial_version, mode='rollback'):
+ return
+
+ # 1. Rollback client version
+ if initial_version != KclClientVersion.VERSION_2X.value:
+ rollback_succeeded, initial_version = rollback_client_version(
+ dynamodb_client, coordinator_state_table_name, new_history
+ )
+ if not rollback_succeeded:
+ return
+
+ print(f"Waiting for {GSI_DELETION_WAIT_TIME_SECONDS} seconds before cleaning up KCL 3.0 resources after rollback...")
+ time.sleep(GSI_DELETION_WAIT_TIME_SECONDS)
+
+ # 2. Delete the GSI
+ delete_gsi_if_exists(dynamodb_client, lease_table_name)
+
+ # 3. Delete worker metrics table
+ delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name)
+
+ # Log success
+ if initial_version == KclClientVersion.UPGRADE_FROM_2X.value:
+ print("\nRollback completed. Your application was running 2x compatible functionality.")
+ print("Please rollback to your previous application binaries by deploying the code with your previous KCL version.")
+ elif initial_version == KclClientVersion.VERSION_3X_WITH_ROLLBACK.value:
+ print("\nRollback completed. Your KCL Application was running 3x functionality and will rollback to 2x compatible functionality.")
+ print("If you don't see mitigation after a short period of time,"
+ " please rollback to your previous application binaries by deploying the code with your previous KCL version.")
+ elif initial_version == KclClientVersion.VERSION_2X.value:
+ print("\nApplication was already rolled back. Any KCLv3 resources that could be deleted were cleaned up"
+ " to avoid charges until the application can be rolled forward with migration.")
+
+
+def perform_rollforward(dynamodb_client, coordinator_state_table_name):
+ """
+ Perform KCL 3.0 migration roll-forward by updating MigrationState for the KCL application
+
+ :param dynamodb_client: Boto3 DynamoDB client
+ :param coordinator_state_table_name: Name of the DynamoDB table
+ """
+ if not validate_tables(dynamodb_client, "Roll-forward", coordinator_state_table_name):
+ return
+
+ try:
+ initial_version, new_history = get_current_state(dynamodb_client,
+ coordinator_state_table_name)
+ except ClientError as e:
+ handle_get_item_client_error(e, "Roll-forward", coordinator_state_table_name)
+ return
+
+ if not is_valid_version(version=initial_version, mode='rollforward'):
+ return
+
+ rollfoward_client_version(dynamodb_client, coordinator_state_table_name, new_history)
+
+
+def run_kcl_migration(mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name):
+ """
+ Update the MigrationState in CoordinatorState DDB Table
+
+ :param mode: Either 'rollback' or 'rollforward'
+ :param lease_table_name: Name of the DynamoDB KCL lease table
+ :param coordinator_state_table_name: Name of the DynamoDB coordinator state table
+ :param worker_metrics_table_name: Name of the DynamoDB worker metrics table
+ """
+ dynamodb_client = boto3.client('dynamodb', config=config)
+
+ if mode == "rollback":
+ perform_rollback(
+ dynamodb_client,
+ lease_table_name,
+ coordinator_state_table_name,
+ worker_metrics_table_name
+ )
+ elif mode == "rollforward":
+ perform_rollforward(dynamodb_client, coordinator_state_table_name)
+ else:
+ print(f"Invalid mode: {mode}. Please use 'rollback' or 'rollforward'.")
+
+
+def validate_args(args):
+ if args.mode == 'rollforward':
+ if not (args.application_name or args.coordinator_state_table_name):
+ raise ValueError(
+ "For rollforward mode, either application_name or "
+ "coordinator_state_table_name must be provided."
+ )
+ else:
+ if args.application_name:
+ return
+
+ if not (args.lease_table_name and
+ args.coordinator_state_table_name and
+ args.worker_metrics_table_name):
+ raise ValueError(
+ "For rollback mode, either application_name or all three table names "
+ "(lease_table_name, coordinator_state_table_name, and "
+ "worker_metrics_table_name) must be provided."
+ )
+
+def process_table_names(args):
+ """
+ Process command line arguments to determine table names based on mode.
+ Args:
+ args: Parsed command line arguments
+ Returns:
+ tuple: (mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name)
+ """
+ mode_input = args.mode
+ application_name_input = args.application_name
+
+ coordinator_state_table_name_input = (args.coordinator_state_table_name or
+ application_name_input + DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX)
+ lease_table_name_input = None
+ worker_metrics_table_name_input = None
+
+ if mode_input == "rollback":
+ lease_table_name_input = args.lease_table_name or application_name_input
+ worker_metrics_table_name_input = (args.worker_metrics_table_name or
+ application_name_input + DEFAULT_WORKER_METRICS_TABLE_SUFFIX)
+
+ return (mode_input,
+ lease_table_name_input,
+ coordinator_state_table_name_input,
+ worker_metrics_table_name_input)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description=
+ """
+ KCL Migration Tool
+ This tool facilitates the migration and rollback processes for Amazon KCLv3 applications.
+
+ Before running this tool:
+ 1. Ensure you have the necessary AWS permissions configured to access and modify the following:
+ - KCL application DynamoDB tables (lease table and coordinator state table)
+
+ 2. Verify that your AWS credentials are properly set up in your environment or AWS config file.
+
+ 3. Confirm that you have the correct KCL application name and lease table name (if configured in KCL).
+
+ Usage:
+ This tool supports two main operations: rollforward (upgrade) and rollback.
+ For detailed usage instructions, use the -h or --help option.
+ """,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument("--mode", choices=['rollback', 'rollforward'], required=True,
+ help="Mode of operation: rollback or rollforward")
+ parser.add_argument("--application_name",
+ help="Name of the KCL application. This must match the application name "
+ "used in the KCL Library configurations.")
+ parser.add_argument("--lease_table_name",
+ help="Name of the DynamoDB lease table (defaults to applicationName)."
+ " If LeaseTable name was specified for the application as part of "
+ "the KCL configurations, the same name must be passed here.")
+ parser.add_argument("--coordinator_state_table_name",
+ help="Name of the DynamoDB coordinator state table "
+ "(defaults to applicationName-CoordinatorState)."
+ " If coordinator state table name was specified for the application "
+ "as part of the KCL configurations, the same name must be passed here.")
+ parser.add_argument("--worker_metrics_table_name",
+ help="Name of the DynamoDB worker metrics table "
+ "(defaults to applicationName-WorkerMetricStats)."
+ " If worker metrics table name was specified for the application "
+ "as part of the KCL configurations, the same name must be passed here.")
+ parser.add_argument("--region", required=True,
+ help="AWS Region where your KCL application exists")
+ args = parser.parse_args()
+ validate_args(args)
+ config.region_name = args.region
+ run_kcl_migration(*process_table_names(args))
\ No newline at end of file
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java
index 2838d62dc..fcaec1977 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java
@@ -256,7 +256,8 @@ public CoordinatorConfig coordinatorConfig() {
* @return LeaseManagementConfig
*/
public LeaseManagementConfig leaseManagementConfig() {
- return new LeaseManagementConfig(tableName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
+ return new LeaseManagementConfig(
+ tableName(), applicationName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
}
/**
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/DdbTableConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/DdbTableConfig.java
new file mode 100644
index 000000000..4507d9616
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/DdbTableConfig.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.common;
+
+import java.util.Collection;
+import java.util.Collections;
+
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.experimental.Accessors;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.Tag;
+
+/**
+ * Configurations of a DDB table created by KCL for its internal operations.
+ */
+@Data
+@Accessors(fluent = true)
+@NoArgsConstructor
+public class DdbTableConfig {
+
+ protected DdbTableConfig(final String applicationName, final String tableSuffix) {
+ this.tableName = applicationName + "-" + tableSuffix;
+ }
+
+ /**
+ * name to use for the DDB table. If null, it will default to
+ * applicationName-tableSuffix. If multiple KCL applications
+ * run in the same account, a unique tableName must be provided.
+ */
+ private String tableName;
+
+ /**
+ * Billing mode used to create the DDB table.
+ */
+ private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
+
+ /**
+ * read capacity to provision during DDB table creation,
+ * if billing mode is PROVISIONED.
+ */
+ private long readCapacity;
+
+ /**
+ * write capacity to provision during DDB table creation,
+ * if billing mode is PROVISIONED.
+ */
+ private long writeCapacity;
+
+ /**
+ * Flag to enable Point in Time Recovery on the DDB table.
+ */
+ private boolean pointInTimeRecoveryEnabled = false;
+
+ /**
+ * Flag to enable deletion protection on the DDB table.
+ */
+ private boolean deletionProtectionEnabled = false;
+
+ /**
+ * Tags to add to the DDB table.
+ */
+ private Collection tags = Collections.emptyList();
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java
index 3c104d8da..5615ffc52 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java
@@ -15,10 +15,13 @@
package software.amazon.kinesis.common;
import java.time.Duration;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
+import java.util.function.Supplier;
public class FutureUtils {
@@ -31,4 +34,15 @@ public static T resolveOrCancelFuture(Future future, Duration timeout)
throw te;
}
}
+
+ public static T unwrappingFuture(final Supplier> supplier) {
+ try {
+ return supplier.get().join();
+ } catch (CompletionException e) {
+ if (e.getCause() instanceof RuntimeException) {
+ throw (RuntimeException) e.getCause();
+ }
+ throw e;
+ }
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/TableConstants.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/StackTraceUtils.java
similarity index 51%
rename from amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/TableConstants.java
rename to amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/StackTraceUtils.java
index 14cb0eb5c..cffd2d6f3 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/TableConstants.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/StackTraceUtils.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2019 Amazon.com, Inc. or its affiliates.
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
@@ -12,18 +12,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package software.amazon.kinesis.common;
-package software.amazon.kinesis.leases.dynamodb;
+public class StackTraceUtils {
+ public static String getPrintableStackTrace(final StackTraceElement[] stackTrace) {
+ final StringBuilder stackTraceString = new StringBuilder();
-import lombok.AccessLevel;
-import lombok.NoArgsConstructor;
+ for (final StackTraceElement traceElement : stackTrace) {
+ stackTraceString.append("\tat ").append(traceElement).append("\n");
+ }
-/**
- * This class is just a holder for initial lease table IOPs units. This class will be removed in a future release.
- */
-@Deprecated
-@NoArgsConstructor(access = AccessLevel.PRIVATE)
-public class TableConstants {
- public static final long DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY = 10L;
- public static final long DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY = 10L;
+ return stackTraceString.toString();
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java
index e1835228c..163462593 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java
@@ -18,6 +18,7 @@
import lombok.Data;
import lombok.NonNull;
import lombok.experimental.Accessors;
+import software.amazon.kinesis.common.DdbTableConfig;
import software.amazon.kinesis.leases.NoOpShardPrioritization;
import software.amazon.kinesis.leases.ShardPrioritization;
@@ -27,6 +28,14 @@
@Data
@Accessors(fluent = true)
public class CoordinatorConfig {
+
+ private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
+
+ public CoordinatorConfig(final String applicationName) {
+ this.applicationName = applicationName;
+ this.coordinatorStateTableConfig = new CoordinatorStateTableConfig(applicationName);
+ }
+
/**
* Application name used by checkpointer to checkpoint.
*
@@ -96,4 +105,53 @@ public class CoordinatorConfig {
* Default value: 1000L
*/
private long schedulerInitializationBackoffTimeMillis = 1000L;
+
+ /**
+ * Version the KCL needs to operate in. For more details check the KCLv3 migration
+ * documentation.
+ */
+ public enum ClientVersionConfig {
+ /**
+ * For an application that was operating with previous KCLv2.x, during
+ * upgrade to KCLv3.x, a migration process is needed due to the incompatible
+ * changes between the 2 versions. During the migration process, application
+ * must use ClientVersion=CLIENT_VERSION_COMPATIBLE_WITH_2x so that it runs in
+ * a compatible mode until all workers in the cluster have upgraded to the version
+ * running 3.x version (which is determined based on workers emitting WorkerMetricStats)
+ * Once all known workers are in 3.x mode, the library auto toggles to 3.x mode;
+ * but prior to that it runs in a mode compatible with 2.x workers.
+ * This version also allows rolling back to the compatible mode from the
+ * auto-toggled 3.x mode.
+ */
+ CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X,
+ /**
+ * A new application operating with KCLv3.x will use this value. Also, an application
+ * that has successfully upgraded to 3.x version and no longer needs the ability
+ * for a rollback to a 2.x compatible version, will use this value. In this version,
+ * KCL will operate with new algorithms introduced in 3.x which is not compatible
+ * with prior versions. And once in this version, rollback to 2.x is not supported.
+ */
+ CLIENT_VERSION_CONFIG_3X,
+ }
+
+ /**
+ * Client version KCL must operate in, by default it operates in 3.x version which is not
+ * compatible with prior versions.
+ */
+ private ClientVersionConfig clientVersionConfig = ClientVersionConfig.CLIENT_VERSION_CONFIG_3X;
+
+ public static class CoordinatorStateTableConfig extends DdbTableConfig {
+ private CoordinatorStateTableConfig(final String applicationName) {
+ super(applicationName, "CoordinatorState");
+ }
+ }
+
+ /**
+ * Configuration to control how the CoordinatorState DDB table is created, such as table name,
+ * billing mode, provisioned capacity. If no table name is specified, the table name will
+ * default to applicationName-CoordinatorState. If no billing more is chosen, default is
+ * On-Demand.
+ */
+ @NonNull
+ private final CoordinatorStateTableConfig coordinatorStateTableConfig;
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorState.java
new file mode 100644
index 000000000..65de6504a
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorState.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator;
+
+import java.util.Map;
+
+import lombok.AccessLevel;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+
+/**
+ * DataModel for CoordinatorState, this data model is used to store various state information required
+ * for coordination across the KCL worker fleet. Therefore, the model follows a flexible schema.
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor(access = AccessLevel.PRIVATE)
+@Slf4j
+@KinesisClientInternalApi
+public class CoordinatorState {
+ public static final String COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME = "key";
+
+ /**
+ * Key value for the item in the CoordinatorState table used for leader
+ * election among the KCL workers. The attributes relevant to this item
+ * is dictated by the DDB Lock client implementation that is used to
+ * provide mutual exclusion.
+ */
+ public static final String LEADER_HASH_KEY = "Leader";
+
+ private String key;
+
+ private Map attributes;
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorStateDAO.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorStateDAO.java
new file mode 100644
index 000000000..0a43ec65e
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorStateDAO.java
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions;
+import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions.AmazonDynamoDBLockClientOptionsBuilder;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.MapUtils;
+import software.amazon.awssdk.core.waiters.WaiterResponse;
+import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
+import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
+import software.amazon.awssdk.services.dynamodb.model.BillingMode;
+import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.DynamoDbException;
+import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
+import software.amazon.awssdk.services.dynamodb.model.KeyType;
+import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
+import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
+import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
+import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
+import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
+import software.amazon.awssdk.services.dynamodb.model.TableDescription;
+import software.amazon.awssdk.services.dynamodb.model.TableStatus;
+import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
+import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter;
+import software.amazon.awssdk.utils.CollectionUtils;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.common.FutureUtils;
+import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
+import software.amazon.kinesis.coordinator.migration.MigrationState;
+import software.amazon.kinesis.leases.DynamoUtils;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+import software.amazon.kinesis.utils.DdbUtil;
+
+import static java.util.Objects.nonNull;
+import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture;
+import static software.amazon.kinesis.coordinator.CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME;
+
+/**
+ * Data Access Object to abstract accessing {@link CoordinatorState} from
+ * the CoordinatorState DDB table.
+ */
+@Slf4j
+@KinesisClientInternalApi
+public class CoordinatorStateDAO {
+ private final DynamoDbAsyncClient dynamoDbAsyncClient;
+ private final DynamoDbClient dynamoDbSyncClient;
+
+ private final CoordinatorStateTableConfig config;
+
+ public CoordinatorStateDAO(
+ final DynamoDbAsyncClient dynamoDbAsyncClient, final CoordinatorStateTableConfig config) {
+ this.dynamoDbAsyncClient = dynamoDbAsyncClient;
+ this.config = config;
+ this.dynamoDbSyncClient = createDelegateClient();
+ }
+
+ public void initialize() throws DependencyException {
+ createTableIfNotExists();
+ }
+
+ private DynamoDbClient createDelegateClient() {
+ return new DynamoDbAsyncToSyncClientAdapter(dynamoDbAsyncClient);
+ }
+
+ public AmazonDynamoDBLockClientOptionsBuilder getDDBLockClientOptionsBuilder() {
+ return AmazonDynamoDBLockClientOptions.builder(dynamoDbSyncClient, config.tableName())
+ .withPartitionKeyName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME);
+ }
+
+ /**
+ * List all the {@link CoordinatorState} from the DDB table synchronously
+ *
+ * @throws DependencyException if DynamoDB scan fails in an unexpected way
+ * @throws InvalidStateException if ddb table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
+ *
+ * @return list of state
+ */
+ public List listCoordinatorState()
+ throws ProvisionedThroughputException, DependencyException, InvalidStateException {
+ log.debug("Listing coordinatorState");
+
+ final ScanRequest request =
+ ScanRequest.builder().tableName(config.tableName()).build();
+
+ try {
+ ScanResponse response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(request));
+ final List stateList = new ArrayList<>();
+ while (Objects.nonNull(response)) {
+ log.debug("Scan response {}", response);
+
+ response.items().stream().map(this::fromDynamoRecord).forEach(stateList::add);
+ if (!CollectionUtils.isNullOrEmpty(response.lastEvaluatedKey())) {
+ final ScanRequest continuationRequest = request.toBuilder()
+ .exclusiveStartKey(response.lastEvaluatedKey())
+ .build();
+ log.debug("Scan request {}", continuationRequest);
+ response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(continuationRequest));
+ } else {
+ log.debug("Scan finished");
+ response = null;
+ }
+ }
+ return stateList;
+ } catch (final ProvisionedThroughputExceededException e) {
+ log.warn(
+ "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ + " on the table.",
+ config.tableName());
+ throw new ProvisionedThroughputException(e);
+ } catch (final ResourceNotFoundException e) {
+ throw new InvalidStateException(
+ String.format("Cannot list coordinatorState, because table %s does not exist", config.tableName()));
+ } catch (final DynamoDbException e) {
+ throw new DependencyException(e);
+ }
+ }
+
+ /**
+ * Create a new {@link CoordinatorState} if it does not exist.
+ * @param state the state to create
+ * @return true if state was created, false if it already exists
+ *
+ * @throws DependencyException if DynamoDB put fails in an unexpected way
+ * @throws InvalidStateException if lease table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB put fails due to lack of capacity
+ */
+ public boolean createCoordinatorStateIfNotExists(final CoordinatorState state)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+ log.debug("Creating coordinatorState {}", state);
+
+ final PutItemRequest request = PutItemRequest.builder()
+ .tableName(config.tableName())
+ .item(toDynamoRecord(state))
+ .expected(getDynamoNonExistentExpectation())
+ .build();
+
+ try {
+ FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.putItem(request));
+ } catch (final ConditionalCheckFailedException e) {
+ log.info("Not creating coordinator state because the key already exists");
+ return false;
+ } catch (final ProvisionedThroughputExceededException e) {
+ log.warn(
+ "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ + " on the table.",
+ config.tableName());
+ throw new ProvisionedThroughputException(e);
+ } catch (final ResourceNotFoundException e) {
+ throw new InvalidStateException(String.format(
+ "Cannot create coordinatorState %s, because table %s does not exist", state, config.tableName()));
+ } catch (final DynamoDbException e) {
+ throw new DependencyException(e);
+ }
+
+ log.info("Created CoordinatorState: {}", state);
+ return true;
+ }
+
+ /**
+ * @param key Get the CoordinatorState for this key
+ *
+ * @throws InvalidStateException if ddb table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB get fails due to lack of capacity
+ * @throws DependencyException if DynamoDB get fails in an unexpected way
+ *
+ * @return state for the specified key, or null if one doesn't exist
+ */
+ public CoordinatorState getCoordinatorState(@NonNull final String key)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+ log.debug("Getting coordinatorState with key {}", key);
+
+ final GetItemRequest request = GetItemRequest.builder()
+ .tableName(config.tableName())
+ .key(getCoordinatorStateKey(key))
+ .consistentRead(true)
+ .build();
+
+ try {
+ final GetItemResponse result = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.getItem(request));
+
+ final Map dynamoRecord = result.item();
+ if (CollectionUtils.isNullOrEmpty(dynamoRecord)) {
+ log.debug("No coordinatorState found with key {}, returning null.", key);
+ return null;
+ }
+ return fromDynamoRecord(dynamoRecord);
+ } catch (final ProvisionedThroughputExceededException e) {
+ log.warn(
+ "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ + " on the table.",
+ config.tableName());
+ throw new ProvisionedThroughputException(e);
+ } catch (final ResourceNotFoundException e) {
+ throw new InvalidStateException(String.format(
+ "Cannot get coordinatorState for key %s, because table %s does not exist",
+ key, config.tableName()));
+ } catch (final DynamoDbException e) {
+ throw new DependencyException(e);
+ }
+ }
+
+ /**
+ * Update fields of the given coordinator state in DynamoDB. Conditional on the provided expectation.
+ *
+ * @return true if update succeeded, false otherwise when expectations are not met
+ *
+ * @throws InvalidStateException if table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
+ * @throws DependencyException if DynamoDB update fails in an unexpected way
+ */
+ public boolean updateCoordinatorStateWithExpectation(
+ @NonNull final CoordinatorState state, final Map expectations)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+ final Map expectationMap = getDynamoExistentExpectation(state.getKey());
+ expectationMap.putAll(MapUtils.emptyIfNull(expectations));
+
+ final Map updateMap = getDynamoCoordinatorStateUpdate(state);
+
+ final UpdateItemRequest request = UpdateItemRequest.builder()
+ .tableName(config.tableName())
+ .key(getCoordinatorStateKey(state.getKey()))
+ .expected(expectationMap)
+ .attributeUpdates(updateMap)
+ .build();
+
+ try {
+ FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.updateItem(request));
+ } catch (final ConditionalCheckFailedException e) {
+ log.debug("CoordinatorState update {} failed because conditions were not met", state);
+ return false;
+ } catch (final ProvisionedThroughputExceededException e) {
+ log.warn(
+ "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ + " on the table.",
+ config.tableName());
+ throw new ProvisionedThroughputException(e);
+ } catch (final ResourceNotFoundException e) {
+ throw new InvalidStateException(String.format(
+ "Cannot update coordinatorState for key %s, because table %s does not exist",
+ state.getKey(), config.tableName()));
+ } catch (final DynamoDbException e) {
+ throw new DependencyException(e);
+ }
+
+ log.info("Coordinator state updated {}", state);
+ return true;
+ }
+
+ private void createTableIfNotExists() throws DependencyException {
+ TableDescription tableDescription = getTableDescription();
+ if (tableDescription == null) {
+ final CreateTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.createTable(getRequest()));
+ tableDescription = response.tableDescription();
+ log.info("DDB Table: {} created", config.tableName());
+ } else {
+ log.info("Skipping DDB table {} creation as it already exists", config.tableName());
+ }
+
+ if (tableDescription.tableStatus() != TableStatus.ACTIVE) {
+ log.info("Waiting for DDB Table: {} to become active", config.tableName());
+ try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) {
+ final WaiterResponse response =
+ unwrappingFuture(() -> waiter.waitUntilTableExists(
+ r -> r.tableName(config.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10))));
+ response.matched()
+ .response()
+ .orElseThrow(() -> new DependencyException(new IllegalStateException(
+ "Creating CoordinatorState table timed out",
+ response.matched().exception().orElse(null))));
+ }
+ unwrappingFuture(() -> DdbUtil.pitrEnabler(config, dynamoDbAsyncClient));
+ }
+ }
+
+ private CreateTableRequest getRequest() {
+ final CreateTableRequest.Builder requestBuilder = CreateTableRequest.builder()
+ .tableName(config.tableName())
+ .keySchema(KeySchemaElement.builder()
+ .attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
+ .keyType(KeyType.HASH)
+ .build())
+ .attributeDefinitions(AttributeDefinition.builder()
+ .attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
+ .attributeType(ScalarAttributeType.S)
+ .build())
+ .deletionProtectionEnabled(config.deletionProtectionEnabled());
+
+ if (nonNull(config.tags()) && !config.tags().isEmpty()) {
+ requestBuilder.tags(config.tags());
+ }
+
+ switch (config.billingMode()) {
+ case PAY_PER_REQUEST:
+ requestBuilder.billingMode(BillingMode.PAY_PER_REQUEST);
+ break;
+ case PROVISIONED:
+ requestBuilder.billingMode(BillingMode.PROVISIONED);
+
+ final ProvisionedThroughput throughput = ProvisionedThroughput.builder()
+ .readCapacityUnits(config.readCapacity())
+ .writeCapacityUnits(config.writeCapacity())
+ .build();
+ requestBuilder.provisionedThroughput(throughput);
+ break;
+ }
+ return requestBuilder.build();
+ }
+
+ private Map getCoordinatorStateKey(@NonNull final String key) {
+ return Collections.singletonMap(
+ COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(key));
+ }
+
+ private CoordinatorState fromDynamoRecord(final Map dynamoRecord) {
+ final HashMap attributes = new HashMap<>(dynamoRecord);
+ final String keyValue =
+ DynamoUtils.safeGetString(attributes.remove(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME));
+
+ final MigrationState migrationState = MigrationState.deserialize(keyValue, attributes);
+ if (migrationState != null) {
+ log.debug("Retrieved MigrationState {}", migrationState);
+ return migrationState;
+ }
+
+ final CoordinatorState c =
+ CoordinatorState.builder().key(keyValue).attributes(attributes).build();
+ log.debug("Retrieved coordinatorState {}", c);
+
+ return c;
+ }
+
+ private Map toDynamoRecord(final CoordinatorState state) {
+ final Map result = new HashMap<>();
+ result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(state.getKey()));
+ if (state instanceof MigrationState) {
+ result.putAll(((MigrationState) state).serialize());
+ }
+ if (!CollectionUtils.isNullOrEmpty(state.getAttributes())) {
+ result.putAll(state.getAttributes());
+ }
+ return result;
+ }
+
+ private Map getDynamoNonExistentExpectation() {
+ final Map result = new HashMap<>();
+
+ final ExpectedAttributeValue expectedAV =
+ ExpectedAttributeValue.builder().exists(false).build();
+ result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
+
+ return result;
+ }
+
+ private Map getDynamoExistentExpectation(final String keyValue) {
+ final Map result = new HashMap<>();
+
+ final ExpectedAttributeValue expectedAV = ExpectedAttributeValue.builder()
+ .value(AttributeValue.fromS(keyValue))
+ .build();
+ result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
+
+ return result;
+ }
+
+ private Map getDynamoCoordinatorStateUpdate(final CoordinatorState state) {
+ final HashMap updates = new HashMap<>();
+ if (state instanceof MigrationState) {
+ updates.putAll(((MigrationState) state).getDynamoUpdate());
+ }
+ state.getAttributes()
+ .forEach((attribute, value) -> updates.put(
+ attribute,
+ AttributeValueUpdate.builder()
+ .value(value)
+ .action(AttributeAction.PUT)
+ .build()));
+ return updates;
+ }
+
+ private TableDescription getTableDescription() {
+ try {
+ final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(
+ DescribeTableRequest.builder().tableName(config.tableName()).build()));
+ return response.table();
+ } catch (final ResourceNotFoundException e) {
+ return null;
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java
index 4c7f25daa..2fb81abc8 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java
@@ -18,6 +18,7 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
+import java.util.Objects;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
@@ -28,12 +29,17 @@
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
/**
* An implementation of the {@code LeaderDecider} to elect leader(s) based on workerId.
@@ -59,6 +65,7 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
private final LeaseRefresher leaseRefresher;
private final int numPeriodicShardSyncWorkers;
private final ScheduledExecutorService leaderElectionThreadPool;
+ private final MetricsFactory metricsFactory;
private volatile Set leaders;
@@ -70,8 +77,14 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
DeterministicShuffleShardSyncLeaderDecider(
LeaseRefresher leaseRefresher,
ScheduledExecutorService leaderElectionThreadPool,
- int numPeriodicShardSyncWorkers) {
- this(leaseRefresher, leaderElectionThreadPool, numPeriodicShardSyncWorkers, new ReentrantReadWriteLock());
+ int numPeriodicShardSyncWorkers,
+ MetricsFactory metricsFactory) {
+ this(
+ leaseRefresher,
+ leaderElectionThreadPool,
+ numPeriodicShardSyncWorkers,
+ new ReentrantReadWriteLock(),
+ metricsFactory);
}
/**
@@ -84,11 +97,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
LeaseRefresher leaseRefresher,
ScheduledExecutorService leaderElectionThreadPool,
int numPeriodicShardSyncWorkers,
- ReadWriteLock readWriteLock) {
+ ReadWriteLock readWriteLock,
+ MetricsFactory metricsFactory) {
this.leaseRefresher = leaseRefresher;
this.leaderElectionThreadPool = leaderElectionThreadPool;
this.numPeriodicShardSyncWorkers = numPeriodicShardSyncWorkers;
this.readWriteLock = readWriteLock;
+ this.metricsFactory = metricsFactory;
}
/*
@@ -102,7 +117,7 @@ private void electLeaders() {
List leases = leaseRefresher.listLeases();
List uniqueHosts = leases.stream()
.map(Lease::leaseOwner)
- .filter(owner -> owner != null)
+ .filter(Objects::nonNull)
.distinct()
.sorted()
.collect(Collectors.toList());
@@ -146,8 +161,13 @@ public synchronized Boolean isLeader(String workerId) {
ELECTION_SCHEDULING_INTERVAL_MILLIS,
TimeUnit.MILLISECONDS);
}
-
- return executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
+ final boolean response = executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
+ final MetricsScope metricsScope =
+ MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
+ metricsScope.addData(
+ METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
+ MetricsUtil.endScope(metricsScope);
+ return response;
}
@Override
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializer.java
new file mode 100644
index 000000000..c4aecdda2
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializer.java
@@ -0,0 +1,403 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator;
+
+import java.time.Duration;
+import java.util.Objects;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+
+import lombok.AccessLevel;
+import lombok.Builder;
+import lombok.Getter;
+import lombok.experimental.Accessors;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode;
+import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
+import software.amazon.kinesis.coordinator.migration.ClientVersion;
+import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
+import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
+import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
+import software.amazon.kinesis.leases.LeaseRefresher;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsReporter;
+
+import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
+import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT;
+import static software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager.DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD;
+
+/**
+ * This class is responsible for initializing the KCL components that supports
+ * seamless upgrade from v2.x to v3.x.
+ * During specific versions, it also dynamically switches the functionality
+ * to be either vanilla 3.x or 2.x compatible.
+ *
+ * It is responsible for creating:
+ * 1. LeaderDecider
+ * 2. LAM
+ * 3. WorkerMetricStatsReporter
+ *
+ * It manages initializing the following components at initialization time
+ * 1. workerMetricsDAO and workerMetricsManager
+ * 2. leaderDecider
+ * 3. MigrationAdaptiveLeaseAssignmentModeProvider
+ *
+ * It updates the following components dynamically:
+ * 1. starts/stops LAM
+ * 2. starts/stops WorkerMetricStatsReporter
+ * 3. updates LeaseAssignmentMode to either DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT or WORKER_UTILIZATION_AWARE_ASSIGNMENT
+ * 4. creates GSI (deletion is done by KclMigrationTool)
+ * 5. creates WorkerMetricStats table (deletion is done by KclMigrationTool)
+ * 6. updates LeaderDecider to either DeterministicShuffleShardSyncLeaderDecider or DynamoDBLockBasedLeaderDecider
+ */
+@Slf4j
+@KinesisClientInternalApi
+@ThreadSafe
+@Accessors(fluent = true)
+public final class DynamicMigrationComponentsInitializer {
+ private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L;
+
+ @Getter
+ private final MetricsFactory metricsFactory;
+
+ @Getter
+ private final LeaseRefresher leaseRefresher;
+
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ScheduledExecutorService workerMetricsThreadPool;
+
+ @Getter
+ private final WorkerMetricStatsDAO workerMetricsDAO;
+
+ private final WorkerMetricStatsManager workerMetricsManager;
+ private final ScheduledExecutorService lamThreadPool;
+ private final BiFunction lamCreator;
+ private final Supplier adaptiveLeaderDeciderCreator;
+ private final Supplier deterministicLeaderDeciderCreator;
+ private final Supplier ddbLockBasedLeaderDeciderCreator;
+
+ @Getter
+ private final String workerIdentifier;
+
+ private final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
+
+ @Getter
+ private final long workerMetricsExpirySeconds;
+
+ private final MigrationAdaptiveLeaseAssignmentModeProvider leaseModeChangeConsumer;
+
+ @Getter
+ private LeaderDecider leaderDecider;
+
+ private LeaseAssignmentManager leaseAssignmentManager;
+ private ScheduledFuture> workerMetricsReporterFuture;
+ private LeaseAssignmentMode currentAssignmentMode;
+ private boolean dualMode;
+ private boolean initialized;
+
+ @Builder(access = AccessLevel.PACKAGE)
+ DynamicMigrationComponentsInitializer(
+ final MetricsFactory metricsFactory,
+ final LeaseRefresher leaseRefresher,
+ final CoordinatorStateDAO coordinatorStateDAO,
+ final ScheduledExecutorService workerMetricsThreadPool,
+ final WorkerMetricStatsDAO workerMetricsDAO,
+ final WorkerMetricStatsManager workerMetricsManager,
+ final ScheduledExecutorService lamThreadPool,
+ final BiFunction lamCreator,
+ final Supplier adaptiveLeaderDeciderCreator,
+ final Supplier deterministicLeaderDeciderCreator,
+ final Supplier ddbLockBasedLeaderDeciderCreator,
+ final String workerIdentifier,
+ final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
+ final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) {
+ this.metricsFactory = metricsFactory;
+ this.leaseRefresher = leaseRefresher;
+ this.coordinatorStateDAO = coordinatorStateDAO;
+ this.workerIdentifier = workerIdentifier;
+ this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
+ this.workerMetricsExpirySeconds = Duration.ofMillis(DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
+ * workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis())
+ .getSeconds();
+ this.workerMetricsManager = workerMetricsManager;
+ this.workerMetricsDAO = workerMetricsDAO;
+ this.workerMetricsThreadPool = workerMetricsThreadPool;
+ this.lamThreadPool = lamThreadPool;
+ this.lamCreator = lamCreator;
+ this.adaptiveLeaderDeciderCreator = adaptiveLeaderDeciderCreator;
+ this.deterministicLeaderDeciderCreator = deterministicLeaderDeciderCreator;
+ this.ddbLockBasedLeaderDeciderCreator = ddbLockBasedLeaderDeciderCreator;
+ this.leaseModeChangeConsumer = leaseAssignmentModeProvider;
+ }
+
+ public void initialize(final ClientVersion migrationStateMachineStartingClientVersion) throws DependencyException {
+ if (initialized) {
+ log.info("Already initialized, nothing to do");
+ return;
+ }
+
+ // always collect metrics so that when we flip to start reporting we will have accurate historical data.
+ log.info("Start collection of WorkerMetricStats");
+ workerMetricsManager.startManager();
+ if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X) {
+ initializeComponentsFor3x();
+ } else {
+ initializeComponentsForMigration(migrationStateMachineStartingClientVersion);
+ }
+ log.info("Initialized dual mode {} current assignment mode {}", dualMode, currentAssignmentMode);
+
+ log.info("Creating LAM");
+ leaseAssignmentManager = lamCreator.apply(lamThreadPool, leaderDecider);
+ log.info("Initializing {}", leaseModeChangeConsumer.getClass().getSimpleName());
+ leaseModeChangeConsumer.initialize(dualMode, currentAssignmentMode);
+ initialized = true;
+ }
+
+ private void initializeComponentsFor3x() {
+ log.info("Initializing for 3x functionality");
+ dualMode = false;
+ currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
+ log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
+ leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
+ log.info("Initializing {}", leaderDecider.getClass().getSimpleName());
+ leaderDecider.initialize();
+ }
+
+ private void initializeComponentsForMigration(final ClientVersion migrationStateMachineStartingClientVersion) {
+ log.info("Initializing for migration to 3x");
+ dualMode = true;
+ final LeaderDecider initialLeaderDecider;
+ if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) {
+ currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
+ initialLeaderDecider = ddbLockBasedLeaderDeciderCreator.get();
+ } else {
+ currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
+ initialLeaderDecider = deterministicLeaderDeciderCreator.get();
+ }
+ log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
+
+ final MigrationAdaptiveLeaderDecider adaptiveLeaderDecider = adaptiveLeaderDeciderCreator.get();
+ log.info(
+ "Initializing MigrationAdaptiveLeaderDecider with {}",
+ initialLeaderDecider.getClass().getSimpleName());
+ adaptiveLeaderDecider.updateLeaderDecider(initialLeaderDecider);
+ this.leaderDecider = adaptiveLeaderDecider;
+ }
+
+ void shutdown() {
+ log.info("Shutting down components");
+ if (initialized) {
+ log.info("Stopping LAM, LeaderDecider, workerMetrics reporting and collection");
+ leaseAssignmentManager.stop();
+ // leader decider is shut down later when scheduler is doing a final shutdown
+ // since scheduler still accesses the leader decider while shutting down
+ stopWorkerMetricsReporter();
+ workerMetricsManager.stopManager();
+ }
+
+ // lam does not manage lifecycle of its threadpool to easily stop/start dynamically.
+ // once migration code is obsolete (i.e. all 3x functionality is the baseline and no
+ // migration is needed), it can be moved inside lam
+ log.info("Shutting down lamThreadPool and workerMetrics reporter thread pool");
+ lamThreadPool.shutdown();
+ workerMetricsThreadPool.shutdown();
+ try {
+ if (!lamThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
+ lamThreadPool.shutdownNow();
+ }
+ } catch (final InterruptedException e) {
+ log.warn("Interrupted while waiting for shutdown of LeaseAssignmentManager ThreadPool", e);
+ lamThreadPool.shutdownNow();
+ }
+
+ try {
+ if (!workerMetricsThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
+ workerMetricsThreadPool.shutdownNow();
+ }
+ } catch (final InterruptedException e) {
+ Thread.currentThread().interrupt();
+ log.warn("Interrupted while waiting for shutdown of WorkerMetricStatsManager ThreadPool", e);
+ workerMetricsThreadPool.shutdownNow();
+ }
+ }
+
+ private void startWorkerMetricsReporting() throws DependencyException {
+ if (workerMetricsReporterFuture != null) {
+ log.info("Worker metrics reporting is already running...");
+ return;
+ }
+ log.info("Initializing WorkerMetricStats");
+ this.workerMetricsDAO.initialize();
+ log.info("Starting worker metrics reporter");
+ // Start with a delay for workerStatsManager to capture some values and start reporting.
+ workerMetricsReporterFuture = workerMetricsThreadPool.scheduleAtFixedRate(
+ new WorkerMetricStatsReporter(metricsFactory, workerIdentifier, workerMetricsManager, workerMetricsDAO),
+ workerUtilizationAwareAssignmentConfig.inMemoryWorkerMetricsCaptureFrequencyMillis() * 2L,
+ workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis(),
+ TimeUnit.MILLISECONDS);
+ }
+
+ private void stopWorkerMetricsReporter() {
+ log.info("Stopping worker metrics reporter");
+ if (workerMetricsReporterFuture != null) {
+ workerMetricsReporterFuture.cancel(false);
+ workerMetricsReporterFuture = null;
+ }
+ }
+
+ /**
+ * Create LeaseOwnerToLeaseKey GSI for the lease table
+ * @param blockingWait whether to wait for the GSI creation or not, if false, the gsi creation will be initiated
+ * but this call will not block for its creation
+ * @throws DependencyException If DDB fails unexpectedly when creating the GSI
+ */
+ private void createGsi(final boolean blockingWait) throws DependencyException {
+ log.info("Creating Lease table GSI if it does not exist");
+ // KCLv3.0 always starts with GSI available
+ leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists();
+
+ if (blockingWait) {
+ log.info("Waiting for Lease table GSI creation");
+ final long secondsBetweenPolls = 10L;
+ final long timeoutSeconds = 600L;
+ final boolean isIndexActive =
+ leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(secondsBetweenPolls, timeoutSeconds);
+
+ if (!isIndexActive) {
+ throw new DependencyException(
+ new IllegalStateException("Creating LeaseOwnerToLeaseKeyIndex on Lease table timed out"));
+ }
+ }
+ }
+
+ /**
+ * Initialize KCL with components and configuration to support upgrade from 2x. This can happen
+ * at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X.
+ * Or Dynamically during roll-forward from ClientVersion.CLIENT_VERSION_2X.
+ */
+ public synchronized void initializeClientVersionForUpgradeFrom2x(final ClientVersion fromClientVersion)
+ throws DependencyException {
+ log.info("Initializing KCL components for upgrade from 2x from {}", fromClientVersion);
+
+ createGsi(false);
+ startWorkerMetricsReporting();
+ // LAM is not started until the dynamic flip to 3xWithRollback
+ }
+
+ /**
+ * Initialize KCL with components and configuration to run vanilla 3x functionality. This can happen
+ * at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X, or dynamically
+ * during a new deployment when existing worker are in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK
+ */
+ public synchronized void initializeClientVersionFor3x(final ClientVersion fromClientVersion)
+ throws DependencyException {
+ log.info("Initializing KCL components for 3x from {}", fromClientVersion);
+
+ log.info("Initializing LeaseAssignmentManager, DDB-lock-based leader decider, WorkerMetricStats manager"
+ + " and creating the Lease table GSI if it does not exist");
+ if (fromClientVersion == ClientVersion.CLIENT_VERSION_INIT) {
+ // gsi may already exist and be active for migrated application.
+ createGsi(true);
+ startWorkerMetricsReporting();
+ log.info("Starting LAM");
+ leaseAssignmentManager.start();
+ }
+ // nothing to do when transitioning from CLIENT_VERSION_3X_WITH_ROLLBACK.
+ }
+
+ /**
+ * Initialize KCL with components and configuration to run 2x compatible functionality
+ * while allowing roll-forward. This can happen at KCL Worker startup when MigrationStateMachine
+ * starts in ClientVersion.CLIENT_VERSION_2X (after a rollback)
+ * Or Dynamically during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK.
+ */
+ public synchronized void initializeClientVersionFor2x(final ClientVersion fromClientVersion) {
+ log.info("Initializing KCL components for rollback to 2x from {}", fromClientVersion);
+
+ if (fromClientVersion != ClientVersion.CLIENT_VERSION_INIT) {
+ // dynamic rollback
+ stopWorkerMetricsReporter();
+ // Migration Tool will delete the lease table LeaseOwner GSI
+ // and WorkerMetricStats table
+ }
+
+ if (fromClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) {
+ // we are rolling back after flip
+ currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
+ notifyLeaseAssignmentModeChange();
+ log.info("Stopping LAM");
+ leaseAssignmentManager.stop();
+ final LeaderDecider leaderDecider = deterministicLeaderDeciderCreator.get();
+ if (this.leaderDecider instanceof MigrationAdaptiveLeaderDecider) {
+ log.info(
+ "Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
+ ((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
+ } else {
+ throw new IllegalStateException(String.format("Unexpected leader decider %s", this.leaderDecider));
+ }
+ }
+ }
+
+ /**
+ * Initialize KCL with components and configuration to run vanilla 3x functionality
+ * while allowing roll-back to 2x functionality. This can happen at KCL Worker startup
+ * when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK (after the flip)
+ * Or Dynamically during flip from CLIENT_VERSION_UPGRADE_FROM_2X.
+ */
+ public synchronized void initializeClientVersionFor3xWithRollback(final ClientVersion fromClientVersion)
+ throws DependencyException {
+ log.info("Initializing KCL components for 3x with rollback from {}", fromClientVersion);
+
+ if (fromClientVersion == ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X) {
+ // dynamic flip
+ currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
+ notifyLeaseAssignmentModeChange();
+ final LeaderDecider leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
+ log.info("Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
+ ((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
+ } else {
+ startWorkerMetricsReporting();
+ }
+
+ log.info("Starting LAM");
+ leaseAssignmentManager.start();
+ }
+
+ /**
+ * Synchronously invoke the consumer to change the lease assignment mode.
+ */
+ private void notifyLeaseAssignmentModeChange() {
+ if (dualMode) {
+ log.info("Notifying {} of {}", leaseModeChangeConsumer, currentAssignmentMode);
+ if (Objects.nonNull(leaseModeChangeConsumer)) {
+ try {
+ leaseModeChangeConsumer.updateLeaseAssignmentMode(currentAssignmentMode);
+ } catch (final Exception e) {
+ log.warn("LeaseAssignmentMode change consumer threw exception", e);
+ }
+ }
+ } else {
+ throw new IllegalStateException("Unexpected assignment mode change");
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamoDbAsyncToSyncClientAdapter.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamoDbAsyncToSyncClientAdapter.java
new file mode 100644
index 000000000..e9cd24f9c
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamoDbAsyncToSyncClientAdapter.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.coordinator;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.function.Supplier;
+
+import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
+import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
+import software.amazon.awssdk.services.dynamodb.model.BatchGetItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.BatchGetItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DeleteTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
+import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
+import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.PutItemResponse;
+import software.amazon.awssdk.services.dynamodb.model.QueryRequest;
+import software.amazon.awssdk.services.dynamodb.model.QueryResponse;
+import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
+import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
+import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
+import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse;
+import software.amazon.awssdk.services.dynamodb.paginators.BatchGetItemIterable;
+import software.amazon.awssdk.services.dynamodb.paginators.QueryIterable;
+import software.amazon.awssdk.services.dynamodb.paginators.ScanIterable;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+
+/**
+ * DDB Lock client depends on DynamoDbClient and KCL only has DynamoDbAsyncClient configured.
+ * This wrapper delegates APIs from sync client to async client internally so that it can
+ * be used with the DDB Lock client.
+ */
+@KinesisClientInternalApi
+public class DynamoDbAsyncToSyncClientAdapter implements DynamoDbClient {
+ private final DynamoDbAsyncClient asyncClient;
+
+ public DynamoDbAsyncToSyncClientAdapter(final DynamoDbAsyncClient asyncClient) {
+ this.asyncClient = asyncClient;
+ }
+
+ @Override
+ public String serviceName() {
+ return asyncClient.serviceName();
+ }
+
+ @Override
+ public void close() {
+ asyncClient.close();
+ }
+
+ private T handleException(final Supplier> task) {
+ try {
+ return task.get().join();
+ } catch (final CompletionException e) {
+ rethrow(e.getCause());
+ return null;
+ }
+ }
+
+ @Override
+ public CreateTableResponse createTable(final CreateTableRequest request) {
+ return handleException(() -> asyncClient.createTable(request));
+ }
+
+ @Override
+ public DescribeTableResponse describeTable(final DescribeTableRequest request) {
+ return handleException(() -> asyncClient.describeTable(request));
+ }
+
+ @Override
+ public DeleteTableResponse deleteTable(final DeleteTableRequest request) {
+ return handleException(() -> asyncClient.deleteTable(request));
+ }
+
+ @Override
+ public DeleteItemResponse deleteItem(final DeleteItemRequest request) {
+ return handleException(() -> asyncClient.deleteItem(request));
+ }
+
+ @Override
+ public GetItemResponse getItem(final GetItemRequest request) {
+ return handleException(() -> asyncClient.getItem(request));
+ }
+
+ @Override
+ public PutItemResponse putItem(final PutItemRequest request) {
+ return handleException(() -> asyncClient.putItem(request));
+ }
+
+ @Override
+ public UpdateItemResponse updateItem(final UpdateItemRequest request) {
+ return handleException(() -> asyncClient.updateItem(request));
+ }
+
+ @Override
+ public QueryResponse query(final QueryRequest request) {
+ return handleException(() -> asyncClient.query(request));
+ }
+
+ @Override
+ public ScanResponse scan(final ScanRequest request) {
+ return handleException(() -> asyncClient.scan(request));
+ }
+
+ @Override
+ public QueryIterable queryPaginator(final QueryRequest request) {
+ return new QueryIterable(this, request);
+ }
+
+ @Override
+ public ScanIterable scanPaginator(final ScanRequest request) {
+ return new ScanIterable(this, request);
+ }
+
+ @Override
+ public BatchGetItemResponse batchGetItem(final BatchGetItemRequest request) {
+ return handleException(() -> asyncClient.batchGetItem(request));
+ }
+
+ @Override
+ public BatchWriteItemResponse batchWriteItem(final BatchWriteItemRequest request) {
+ return handleException(() -> asyncClient.batchWriteItem(request));
+ }
+
+ @Override
+ public BatchGetItemIterable batchGetItemPaginator(final BatchGetItemRequest request) {
+ return new BatchGetItemIterable(this, request);
+ }
+
+ private static void rethrow(final Throwable e) {
+ castAndThrow(e);
+ }
+
+ @SuppressWarnings("unchecked")
+ private static void castAndThrow(final Throwable e) throws T {
+ throw (T) e;
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java
index 140791af6..8b5ae4d34 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java
@@ -21,6 +21,8 @@
* worker is one of the leaders designated to execute shard-sync and then acts accordingly.
*/
public interface LeaderDecider {
+ String METRIC_OPERATION_LEADER_DECIDER = "LeaderDecider";
+ String METRIC_OPERATION_LEADER_DECIDER_IS_LEADER = METRIC_OPERATION_LEADER_DECIDER + ":IsLeader";
/**
* Method invoked to check the given workerId corresponds to one of the workers
@@ -36,4 +38,23 @@ public interface LeaderDecider {
* being used in the LeaderDecider implementation.
*/
void shutdown();
+
+ /**
+ * Performs initialization tasks for decider if any.
+ */
+ default void initialize() {
+ // No-op by default
+ }
+
+ /**
+ * If the current worker is the leader, then releases the leadership else does nothing.
+ * This might not be relevant for some implementations, for e.g. DeterministicShuffleShardSyncLeaderDecider does
+ * not have mechanism to release leadership.
+ *
+ * Current worker if leader releases leadership, it's possible that the current worker assume leadership sometime
+ * later again in future elections.
+ */
+ default void releaseLeadershipIfHeld() {
+ // No-op by default
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/MigrationAdaptiveLeaseAssignmentModeProvider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/MigrationAdaptiveLeaseAssignmentModeProvider.java
new file mode 100644
index 000000000..ad4124bda
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/MigrationAdaptiveLeaseAssignmentModeProvider.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator;
+
+import lombok.NoArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+
+/**
+ * Provides the lease assignment mode KCL must operate in during migration
+ * from 2.x to 3.x.
+ * KCL v2.x lease assignment is based on distributed-worker-stealing algorithm
+ * which balances lease count across workers.
+ * KCL v3.x lease assignment is based on a centralized-lease-assignment algorithm
+ * which balances resource utilization metrics(e.g. CPU utilization) across workers.
+ *
+ * For a new application starting in KCL v3.x, there is no migration needed,
+ * so KCL will initialize with the lease assignment mode accordingly, and it will
+ * not change dynamically.
+ *
+ * During upgrade from 2.x to 3.x, KCL library needs an ability to
+ * start in v2.x assignment mode but dynamically change to v3.x assignment.
+ * In this case, both 2.x and 3.x lease assignment will be running but one
+ * of them will be a no-op based on the mode.
+ *
+ * The methods and internal state is guarded for concurrent access to allow
+ * both lease assignment algorithms to access the state concurrently while
+ * it could be dynamically updated.
+ */
+@KinesisClientInternalApi
+@Slf4j
+@ThreadSafe
+@NoArgsConstructor
+public final class MigrationAdaptiveLeaseAssignmentModeProvider {
+
+ public enum LeaseAssignmentMode {
+ /**
+ * This is the 2.x assignment mode.
+ * This mode assigns leases based on the number of leases.
+ * This mode involves each worker independently determining how many leases to pick or how many leases to steal
+ * from other workers.
+ */
+ DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT,
+
+ /**
+ * This is the 3.x assigment mode.
+ * This mode uses each worker's resource utilization to perform lease assignment.
+ * Assignment is done by a single worker (elected leader), which looks at WorkerMetricStats for each worker to
+ * determine lease assignment.
+ *
+ * This mode primarily does
+ * 1. Starts WorkerMetricStatsManager on the worker which starts publishing WorkerMetricStats
+ * 2. Starts the LeaseDiscoverer
+ * 3. Creates if not already available the LeaseOwnerToLeaseKey GSI on the lease table and validate that is
+ * ACTIVE.
+ */
+ WORKER_UTILIZATION_AWARE_ASSIGNMENT;
+ }
+
+ private LeaseAssignmentMode currentMode;
+ private boolean initialized = false;
+ private boolean dynamicModeChangeSupportNeeded;
+
+ /**
+ * Specify whether both lease assignment algorithms should be initialized to
+ * support dynamically changing lease mode.
+ * @return true if lease assignment mode can change dynamically
+ * false otherwise.
+ */
+ public synchronized boolean dynamicModeChangeSupportNeeded() {
+ return dynamicModeChangeSupportNeeded;
+ }
+
+ /**
+ * Provide the current lease assignment mode in which KCL should perform lease assignment
+ * @return the current lease assignment mode
+ */
+ public synchronized LeaseAssignmentMode getLeaseAssignmentMode() {
+ if (!initialized) {
+ throw new IllegalStateException("AssignmentMode is not initialized");
+ }
+ return currentMode;
+ }
+
+ synchronized void initialize(final boolean dynamicModeChangeSupportNeeded, final LeaseAssignmentMode mode) {
+ if (!initialized) {
+ log.info("Initializing dynamicModeChangeSupportNeeded {} mode {}", dynamicModeChangeSupportNeeded, mode);
+ this.dynamicModeChangeSupportNeeded = dynamicModeChangeSupportNeeded;
+ this.currentMode = mode;
+ this.initialized = true;
+ return;
+ }
+ log.info(
+ "Already initialized dynamicModeChangeSupportNeeded {} mode {}. Ignoring new values {}, {}",
+ this.dynamicModeChangeSupportNeeded,
+ this.currentMode,
+ dynamicModeChangeSupportNeeded,
+ mode);
+ }
+
+ synchronized void updateLeaseAssignmentMode(final LeaseAssignmentMode mode) {
+ if (!initialized) {
+ throw new IllegalStateException("Cannot change mode before initializing");
+ }
+ if (dynamicModeChangeSupportNeeded) {
+ log.info("Changing Lease assignment mode from {} to {}", currentMode, mode);
+ this.currentMode = mode;
+ return;
+ }
+ throw new IllegalStateException(String.format(
+ "Lease assignment mode already initialized to %s cannot" + " change to %s", this.currentMode, mode));
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java
index f0133ec85..192cf560d 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java
@@ -87,7 +87,7 @@ class PeriodicShardSyncManager {
private final Map hashRangeHoleTrackerMap = new HashMap<>();
private final String workerId;
- private final LeaderDecider leaderDecider;
+ private LeaderDecider leaderDecider;
private final LeaseRefresher leaseRefresher;
private final Map currentStreamConfigMap;
private final Function shardSyncTaskManagerProvider;
@@ -105,7 +105,6 @@ class PeriodicShardSyncManager {
PeriodicShardSyncManager(
String workerId,
- LeaderDecider leaderDecider,
LeaseRefresher leaseRefresher,
Map currentStreamConfigMap,
Function shardSyncTaskManagerProvider,
@@ -117,7 +116,6 @@ class PeriodicShardSyncManager {
AtomicBoolean leaderSynced) {
this(
workerId,
- leaderDecider,
leaseRefresher,
currentStreamConfigMap,
shardSyncTaskManagerProvider,
@@ -132,7 +130,6 @@ class PeriodicShardSyncManager {
PeriodicShardSyncManager(
String workerId,
- LeaderDecider leaderDecider,
LeaseRefresher leaseRefresher,
Map currentStreamConfigMap,
Function shardSyncTaskManagerProvider,
@@ -144,9 +141,7 @@ class PeriodicShardSyncManager {
int leasesRecoveryAuditorInconsistencyConfidenceThreshold,
AtomicBoolean leaderSynced) {
Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager.");
- Validate.notNull(leaderDecider, "LeaderDecider is required to initialize PeriodicShardSyncManager.");
this.workerId = workerId;
- this.leaderDecider = leaderDecider;
this.leaseRefresher = leaseRefresher;
this.currentStreamConfigMap = currentStreamConfigMap;
this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider;
@@ -160,7 +155,9 @@ class PeriodicShardSyncManager {
this.leaderSynced = leaderSynced;
}
- public synchronized TaskResult start() {
+ public synchronized TaskResult start(final LeaderDecider leaderDecider) {
+ Validate.notNull(leaderDecider, "LeaderDecider is required to start PeriodicShardSyncManager.");
+ this.leaderDecider = leaderDecider;
if (!isRunning) {
final Runnable periodicShardSyncer = () -> {
try {
@@ -435,7 +432,7 @@ private List fillWithHashRangesIfRequired(StreamIdentifier streamIdentifi
leaseRefresher.updateLeaseWithMetaInfo(lease, UpdateField.HASH_KEY_RANGE);
} catch (Exception e) {
log.warn(
- "Unable to update hash range key information for lease {} of stream {}."
+ "Unable to update hash range key information for lease {} of stream {}. "
+ "This may result in explicit lease sync.",
lease.leaseKey(),
streamIdentifier);
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java
index 3d2b6c416..0adb69f93 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java
@@ -26,6 +26,7 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
@@ -44,6 +45,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
import lombok.AccessLevel;
import lombok.Getter;
@@ -55,15 +57,23 @@
import software.amazon.awssdk.arns.Arn;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.utils.Validate;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.checkpoint.CheckpointConfig;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.StreamConfig;
import software.amazon.kinesis.common.StreamIdentifier;
+import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
+import software.amazon.kinesis.coordinator.migration.MigrationStateMachine;
+import software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl;
+import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
+import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
import software.amazon.kinesis.leases.HierarchicalShardSyncer;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCleanupManager;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseManagementConfig;
+import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
+import software.amazon.kinesis.leases.LeaseManagementFactory;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.LeaseSerializer;
import software.amazon.kinesis.leases.MultiStreamLease;
@@ -98,6 +108,9 @@
import software.amazon.kinesis.retrieval.RecordsPublisher;
import software.amazon.kinesis.retrieval.RetrievalConfig;
import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder;
+import software.amazon.kinesis.worker.WorkerMetricsSelector;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
import static software.amazon.kinesis.common.ArnUtil.constructStreamArn;
import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrategy.StreamsLeasesDeletionType;
@@ -109,6 +122,7 @@
@Getter
@Accessors(fluent = true)
@Slf4j
+@KinesisClientInternalApi
public class Scheduler implements Runnable {
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
@@ -157,6 +171,7 @@ public class Scheduler implements Runnable {
private final long taskBackoffTimeMillis;
private final boolean isMultiStreamMode;
private final Map currentStreamConfigMap = new StreamConfigMap();
+
private final StreamTracker streamTracker;
private final FormerStreamsLeasesDeletionStrategy formerStreamsLeasesDeletionStrategy;
private final long listShardsBackoffTimeMillis;
@@ -167,19 +182,29 @@ public class Scheduler implements Runnable {
private final AggregatorUtil aggregatorUtil;
private final Function hierarchicalShardSyncerProvider;
private final long schedulerInitializationBackoffTimeMillis;
- private final LeaderDecider leaderDecider;
+ private LeaderDecider leaderDecider;
private final Map staleStreamDeletionMap = new HashMap<>();
private final LeaseCleanupManager leaseCleanupManager;
private final SchemaRegistryDecoder schemaRegistryDecoder;
private final DeletedStreamListProvider deletedStreamListProvider;
+ @Getter(AccessLevel.NONE)
+ private final MigrationStateMachine migrationStateMachine;
+
+ @Getter(AccessLevel.NONE)
+ private final DynamicMigrationComponentsInitializer migrationComponentsInitializer;
+
+ @Getter(AccessLevel.NONE)
+ private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
+
// Holds consumers for shards the worker is currently tracking. Key is shard
// info, value is ShardConsumer.
private final ConcurrentMap shardInfoShardConsumerMap = new ConcurrentHashMap<>();
private volatile boolean shutdown;
private volatile long shutdownStartTimeMillis;
+
private volatile boolean shutdownComplete = false;
private final Object lock = new Object();
@@ -259,11 +284,32 @@ protected Scheduler(
// Determine leaseSerializer based on availability of MultiStreamTracker.
final LeaseSerializer leaseSerializer =
isMultiStreamMode ? new DynamoDBMultiStreamLeaseSerializer() : new DynamoDBLeaseSerializer();
- this.leaseCoordinator = this.leaseManagementConfig
- .leaseManagementFactory(leaseSerializer, isMultiStreamMode)
- .createLeaseCoordinator(this.metricsFactory);
+
+ final LeaseManagementFactory leaseManagementFactory =
+ this.leaseManagementConfig.leaseManagementFactory(leaseSerializer, isMultiStreamMode);
+ this.leaseCoordinator =
+ leaseManagementFactory.createLeaseCoordinator(this.metricsFactory, shardInfoShardConsumerMap);
this.leaseRefresher = this.leaseCoordinator.leaseRefresher();
+ final CoordinatorStateDAO coordinatorStateDAO = new CoordinatorStateDAO(
+ leaseManagementConfig.dynamoDBClient(), coordinatorConfig().coordinatorStateTableConfig());
+ this.leaseAssignmentModeProvider = new MigrationAdaptiveLeaseAssignmentModeProvider();
+ this.migrationComponentsInitializer = createDynamicMigrationComponentsInitializer(coordinatorStateDAO);
+ this.migrationStateMachine = new MigrationStateMachineImpl(
+ metricsFactory,
+ System::currentTimeMillis,
+ coordinatorStateDAO,
+ Executors.newScheduledThreadPool(
+ 2,
+ new ThreadFactoryBuilder()
+ .setNameFormat("MigrationStateMachine-%04d")
+ .build()),
+ coordinatorConfig.clientVersionConfig(),
+ new Random(),
+ this.migrationComponentsInitializer,
+ leaseManagementConfig.workerIdentifier(),
+ Duration.ofMinutes(10).getSeconds());
+
//
// TODO: Figure out what to do with lease manage <=> checkpoint relationship
//
@@ -280,9 +326,8 @@ protected Scheduler(
this.diagnosticEventFactory = diagnosticEventFactory;
this.diagnosticEventHandler = new DiagnosticEventLogger();
this.deletedStreamListProvider = new DeletedStreamListProvider();
- this.shardSyncTaskManagerProvider = streamConfig -> this.leaseManagementConfig
- .leaseManagementFactory(leaseSerializer, isMultiStreamMode)
- .createShardSyncTaskManager(this.metricsFactory, streamConfig, this.deletedStreamListProvider);
+ this.shardSyncTaskManagerProvider = streamConfig -> leaseManagementFactory.createShardSyncTaskManager(
+ this.metricsFactory, streamConfig, this.deletedStreamListProvider);
this.shardPrioritization = this.coordinatorConfig.shardPrioritization();
this.cleanupLeasesUponShardCompletion = this.leaseManagementConfig.cleanupLeasesUponShardCompletion();
this.skipShardSyncAtWorkerInitializationIfLeasesExist =
@@ -299,8 +344,6 @@ protected Scheduler(
this.workerStateChangeListener =
this.coordinatorConfig.coordinatorFactory().createWorkerStateChangeListener();
}
- this.leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(
- leaseRefresher, Executors.newSingleThreadScheduledExecutor(), PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT);
this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis();
this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis();
this.listShardsBackoffTimeMillis = this.retrievalConfig.listShardsBackoffTimeInMillis();
@@ -315,7 +358,6 @@ protected Scheduler(
this.coordinatorConfig.schedulerInitializationBackoffTimeMillis();
this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager(
leaseManagementConfig.workerIdentifier(),
- leaderDecider,
leaseRefresher,
currentStreamConfigMap,
shardSyncTaskManagerProvider,
@@ -325,14 +367,69 @@ protected Scheduler(
leaseManagementConfig.leasesRecoveryAuditorExecutionFrequencyMillis(),
leaseManagementConfig.leasesRecoveryAuditorInconsistencyConfidenceThreshold(),
leaderSynced);
- this.leaseCleanupManager = this.leaseManagementConfig
- .leaseManagementFactory(leaseSerializer, isMultiStreamMode)
- .createLeaseCleanupManager(metricsFactory);
+ this.leaseCleanupManager = leaseManagementFactory.createLeaseCleanupManager(metricsFactory);
this.schemaRegistryDecoder = this.retrievalConfig.glueSchemaRegistryDeserializer() == null
? null
: new SchemaRegistryDecoder(this.retrievalConfig.glueSchemaRegistryDeserializer());
}
+ /**
+ * Depends on LeaseCoordinator and LeaseRefresher to be created first
+ */
+ private DynamicMigrationComponentsInitializer createDynamicMigrationComponentsInitializer(
+ final CoordinatorStateDAO coordinatorStateDAO) {
+ selectWorkerMetricsIfAvailable(leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
+
+ final WorkerMetricStatsManager workerMetricsManager = new WorkerMetricStatsManager(
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig().noOfPersistedMetricsPerWorkerMetrics(),
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricList(),
+ metricsFactory,
+ leaseManagementConfig
+ .workerUtilizationAwareAssignmentConfig()
+ .inMemoryWorkerMetricsCaptureFrequencyMillis());
+
+ final WorkerMetricStatsDAO workerMetricsDAO = new WorkerMetricStatsDAO(
+ leaseManagementConfig.dynamoDBClient(),
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig(),
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsReporterFreqInMillis());
+
+ return DynamicMigrationComponentsInitializer.builder()
+ .metricsFactory(metricsFactory)
+ .leaseRefresher(leaseRefresher)
+ .coordinatorStateDAO(coordinatorStateDAO)
+ .workerMetricsThreadPool(Executors.newScheduledThreadPool(
+ 1,
+ new ThreadFactoryBuilder()
+ .setNameFormat("worker-metrics-reporter")
+ .build()))
+ .workerMetricsDAO(workerMetricsDAO)
+ .workerMetricsManager(workerMetricsManager)
+ .lamThreadPool(Executors.newScheduledThreadPool(
+ 1,
+ new ThreadFactoryBuilder().setNameFormat("lam-thread").build()))
+ .lamCreator((lamThreadPool, leaderDecider) -> new LeaseAssignmentManager(
+ leaseRefresher,
+ workerMetricsDAO,
+ leaderDecider,
+ leaseManagementConfig.workerUtilizationAwareAssignmentConfig(),
+ leaseCoordinator.workerIdentifier(),
+ leaseManagementConfig.failoverTimeMillis(),
+ metricsFactory,
+ lamThreadPool,
+ System::nanoTime,
+ leaseManagementConfig.maxLeasesForWorker(),
+ leaseManagementConfig.gracefulLeaseHandoffConfig()))
+ .adaptiveLeaderDeciderCreator(() -> new MigrationAdaptiveLeaderDecider(metricsFactory))
+ .deterministicLeaderDeciderCreator(() -> new DeterministicShuffleShardSyncLeaderDecider(
+ leaseRefresher, Executors.newSingleThreadScheduledExecutor(), 1, metricsFactory))
+ .ddbLockBasedLeaderDeciderCreator(() -> DynamoDBLockBasedLeaderDecider.create(
+ coordinatorStateDAO, leaseCoordinator.workerIdentifier(), metricsFactory))
+ .workerIdentifier(leaseCoordinator.workerIdentifier())
+ .workerUtilizationAwareAssignmentConfig(leaseManagementConfig.workerUtilizationAwareAssignmentConfig())
+ .leaseAssignmentModeProvider(leaseAssignmentModeProvider)
+ .build();
+ }
+
/**
* Start consuming data from the stream, and pass it to the application record processors.
*/
@@ -342,13 +439,19 @@ public void run() {
return;
}
+ final MetricsScope metricsScope =
+ MetricsUtil.createMetricsWithOperation(metricsFactory, "Scheduler:Initialize");
+ boolean success = false;
try {
initialize();
+ success = true;
log.info("Initialization complete. Starting worker loop.");
} catch (RuntimeException e) {
log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e);
workerStateChangeListener.onAllInitializationAttemptsFailed(e);
shutdown();
+ } finally {
+ MetricsUtil.addSuccess(metricsScope, "Initialize", success, MetricsLevel.SUMMARY);
}
while (!shouldShutdown()) {
runProcessLoop();
@@ -363,14 +466,13 @@ void initialize() {
synchronized (lock) {
registerErrorHandlerForUndeliverableAsyncTaskExceptions();
workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.INITIALIZING);
+
boolean isDone = false;
Exception lastException = null;
-
for (int i = 0; (!isDone) && (i < maxInitializationAttempts); i++) {
try {
log.info("Initializing LeaseCoordinator attempt {}", (i + 1));
leaseCoordinator.initialize();
-
if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) {
if (shouldInitiateLeaseSync()) {
log.info(
@@ -382,21 +484,29 @@ void initialize() {
log.info("Skipping shard sync per configuration setting (and lease table is not empty)");
}
+ // Initialize the state machine after lease table has been initialized
+ // Migration state machine creates and waits for GSI if necessary,
+ // it must be initialized before starting leaseCoordinator, which runs LeaseDiscoverer
+ // and that requires GSI to be present and active. (migrationStateMachine.initialize is idempotent)
+ migrationStateMachine.initialize();
+ leaderDecider = migrationComponentsInitializer.leaderDecider();
+
leaseCleanupManager.start();
// If we reach this point, then we either skipped the lease sync or did not have any exception
// for any of the shard sync in the previous attempt.
+
if (!leaseCoordinator.isRunning()) {
log.info("Starting LeaseCoordinator");
- leaseCoordinator.start();
+ leaseCoordinator.start(leaseAssignmentModeProvider);
} else {
log.info("LeaseCoordinator is already running. No need to start it.");
}
log.info("Scheduling periodicShardSync");
- leaderElectedPeriodicShardSyncManager.start();
+ leaderElectedPeriodicShardSyncManager.start(leaderDecider);
streamSyncWatch.start();
isDone = true;
- } catch (Exception e) {
+ } catch (final Exception e) {
log.error("Caught exception when initializing LeaseCoordinator", e);
lastException = e;
}
@@ -863,7 +973,7 @@ Callable createWorkerShutdownCallable() {
leaseCoordinator, lease, notificationCompleteLatch, shutdownCompleteLatch);
ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
- if (consumer != null) {
+ if (consumer != null && !consumer.isShutdown()) {
consumer.gracefulShutdown(shutdownNotification);
} else {
//
@@ -912,6 +1022,8 @@ public void shutdown() {
shutdown = true;
shutdownStartTimeMillis = System.currentTimeMillis();
+ migrationStateMachine.shutdown();
+ migrationComponentsInitializer.shutdown();
// Stop lease coordinator, so leases are not renewed or stolen from other workers.
// Lost leases will force Worker to begin shutdown process for all shard consumers in
// Worker.run().
@@ -1228,4 +1340,23 @@ private void resetInfoLogging() {
public Future requestShutdown() {
return null;
}
+
+ /**
+ * If WorkerMetricStats list is empty and the disable flag is false, select WorkerMetricStats automatically.
+ */
+ private void selectWorkerMetricsIfAvailable(
+ final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig) {
+ try {
+ if (workerUtilizationAwareAssignmentConfig.workerMetricList().isEmpty()
+ && !workerUtilizationAwareAssignmentConfig.disableWorkerMetrics()) {
+ workerUtilizationAwareAssignmentConfig.workerMetricList(
+ WorkerMetricsSelector.create().getDefaultWorkerMetrics());
+ }
+ } catch (final Exception e) {
+ log.warn(
+ "Exception encountered during WorkerMetricStats selection. If this is persistent please try setting the "
+ + "WorkerMetricStats explicitly.",
+ e);
+ }
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentDecider.java
new file mode 100644
index 000000000..a39866aeb
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentDecider.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.coordinator.assignment;
+
+import java.util.List;
+
+import software.amazon.kinesis.leases.Lease;
+
+public interface LeaseAssignmentDecider {
+
+ /**
+ * Assigns expiredOrUnAssignedLeases to the available workers.
+ */
+ void assignExpiredOrUnassignedLeases(final List expiredOrUnAssignedLeases);
+
+ /**
+ * Balances the leases between workers in the fleet.
+ * Implementation can choose to balance leases based on lease count or throughput or to bring the variance in
+ * resource utilization to a minimum.
+ * Check documentation on implementation class to see how it balances the leases.
+ */
+ void balanceWorkerVariance();
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManager.java
new file mode 100644
index 000000000..982d06847
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManager.java
@@ -0,0 +1,735 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.coordinator.assignment;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections.CollectionUtils;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.LeaderDecider;
+import software.amazon.kinesis.leases.Lease;
+import software.amazon.kinesis.leases.LeaseManagementConfig;
+import software.amazon.kinesis.leases.LeaseRefresher;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+import software.amazon.kinesis.metrics.NullMetricsScope;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
+
+import static java.util.Objects.isNull;
+import static java.util.Objects.nonNull;
+
+/**
+ * Performs the LeaseAssignment for the application. This starts by loading the leases and workerMetrics from the
+ * storage and then starts by assignment (in-memory) of expired and/or unassigned leases after which it tries to perform
+ * balancing of load among the workers by re-assign leases.
+ * In the end, performs actual assignment by writing to storage.
+ */
+@Slf4j
+@RequiredArgsConstructor
+@KinesisClientInternalApi
+public final class LeaseAssignmentManager {
+
+ /**
+ * Default number of continuous failure execution after which leadership is released.
+ */
+ private static final int DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER = 3;
+
+ /**
+ * Default multiplier for LAM frequency with respect to leaseDurationMillis (lease failover millis).
+ * If leaseDurationMillis is 10000 millis, default LAM frequency is 20000 millis.
+ */
+ private static final int DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER = 2;
+
+ /**
+ * Default parallelism factor for scaling lease table.
+ */
+ private static final int DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR = 10;
+
+ private static final String FORCE_LEADER_RELEASE_METRIC_NAME = "ForceLeaderRelease";
+
+ /**
+ * Default retry attempt for loading leases and workers before giving up.
+ */
+ private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
+
+ /**
+ * Internal threadpool used to parallely perform assignment operation by calling storage.
+ */
+ private static final ExecutorService LEASE_ASSIGNMENT_CALL_THREAD_POOL =
+ Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+
+ private static final String METRICS_LEASE_ASSIGNMENT_MANAGER = "LeaseAssignmentManager";
+ private static final String METRICS_INCOMPLETE_EXPIRED_LEASES_ASSIGNMENT =
+ "LeaseAssignmentManager.IncompleteExpiredLeasesAssignment";
+ public static final int DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD = 2;
+
+ private final LeaseRefresher leaseRefresher;
+ private final WorkerMetricStatsDAO workerMetricsDAO;
+ private final LeaderDecider leaderDecider;
+ private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config;
+ private final String currentWorkerId;
+ private final Long leaseDurationMillis;
+ private final MetricsFactory metricsFactory;
+ private final ScheduledExecutorService executorService;
+ private final Supplier nanoTimeProvider;
+ private final int maxLeasesForWorker;
+ private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig;
+ private boolean tookOverLeadershipInThisRun = false;
+ private final Map prevRunLeasesState = new HashMap<>();
+
+ private Future> managerFuture;
+
+ private int noOfContinuousFailedAttempts = 0;
+ private int lamRunCounter = 0;
+
+ public synchronized void start() {
+ if (isNull(managerFuture)) {
+ // LAM can be dynamically started/stopped and restarted during MigrationStateMachine execution
+ // so reset the flag to refresh the state before processing during a restart of LAM.
+ tookOverLeadershipInThisRun = false;
+ managerFuture = executorService.scheduleWithFixedDelay(
+ this::performAssignment,
+ 0L,
+ leaseDurationMillis * DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER,
+ TimeUnit.MILLISECONDS);
+ log.info("Started LeaseAssignmentManager");
+ return;
+ }
+ log.info("LeaseAssignmentManager already running...");
+ }
+
+ public synchronized void stop() {
+ if (nonNull(managerFuture)) {
+ log.info("Completed shutdown of LeaseAssignmentManager");
+ managerFuture.cancel(true);
+ managerFuture = null;
+ return;
+ }
+ log.info("LeaseAssignmentManager is not running...");
+ }
+
+ /**
+ * Creates the MetricsScope for given {@param operation} by calling metricsFactory and falls back to
+ * NullMetricsScope if failed to create MetricsScope.
+ * @param operation Operation name for MetricsScope
+ * @return instance of MetricsScope
+ */
+ private MetricsScope createMetricsScope(final String operation) {
+ try {
+ return MetricsUtil.createMetricsWithOperation(metricsFactory, operation);
+ } catch (final Exception e) {
+ log.error("Failed to create metrics scope defaulting to no metrics.", e);
+ return new NullMetricsScope();
+ }
+ }
+
+ private void performAssignment() {
+
+ final MetricsScope metricsScope = createMetricsScope(METRICS_LEASE_ASSIGNMENT_MANAGER);
+ final long startTime = System.currentTimeMillis();
+ boolean success = false;
+
+ try {
+
+ // If the current worker is not leader, then do nothing as assignment is executed on leader.
+ if (!leaderDecider.isLeader(currentWorkerId)) {
+ log.info("Current worker {} is not a leader, ignore", currentWorkerId);
+ this.tookOverLeadershipInThisRun = false;
+ success = true;
+ return;
+ }
+
+ if (!this.tookOverLeadershipInThisRun) {
+ // This means that there was leader change, perform cleanup of state as this is leader switch.
+ this.tookOverLeadershipInThisRun = true;
+ this.lamRunCounter = 0;
+ prepareAfterLeaderSwitch();
+ }
+ log.info("Current worker {} is a leader, performing assignment", currentWorkerId);
+
+ final InMemoryStorageView inMemoryStorageView = new InMemoryStorageView();
+
+ final long loadStartTime = System.currentTimeMillis();
+ inMemoryStorageView.loadInMemoryStorageView(metricsScope);
+ MetricsUtil.addLatency(metricsScope, "LeaseAndWorkerMetricsLoad", loadStartTime, MetricsLevel.DETAILED);
+
+ publishLeaseAndWorkerCountMetrics(metricsScope, inMemoryStorageView);
+ final LeaseAssignmentDecider leaseAssignmentDecider = new VarianceBasedLeaseAssignmentDecider(
+ inMemoryStorageView,
+ config.dampeningPercentage(),
+ config.reBalanceThresholdPercentage(),
+ config.allowThroughputOvershoot());
+
+ updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
+ inMemoryStorageView.getLeaseList(), inMemoryStorageView.getLeaseTableScanTime());
+
+ // This does not include the leases from the worker that has expired (based on WorkerMetricStats's
+ // lastUpdateTime)
+ // but the lease is not expired (based on the leaseCounter on lease).
+ // If a worker has died, the lease will be expired and assigned in next iteration.
+ final List expiredOrUnAssignedLeases = inMemoryStorageView.getLeaseList().stream()
+ .filter(lease -> lease.isExpired(
+ TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis),
+ inMemoryStorageView.getLeaseTableScanTime())
+ || Objects.isNull(lease.actualOwner()))
+ // marking them for direct reassignment.
+ .map(l -> l.isExpiredOrUnassigned(true))
+ .collect(Collectors.toList());
+
+ log.info("Total expiredOrUnassignedLeases count : {}", expiredOrUnAssignedLeases.size());
+ metricsScope.addData(
+ "ExpiredLeases", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
+
+ final long expiredAndUnassignedLeaseAssignmentStartTime = System.currentTimeMillis();
+ leaseAssignmentDecider.assignExpiredOrUnassignedLeases(expiredOrUnAssignedLeases);
+ MetricsUtil.addLatency(
+ metricsScope,
+ "AssignExpiredOrUnassignedLeases",
+ expiredAndUnassignedLeaseAssignmentStartTime,
+ MetricsLevel.DETAILED);
+
+ if (!expiredOrUnAssignedLeases.isEmpty()) {
+ // When expiredOrUnAssignedLeases is not empty, that means
+ // that we were not able to assign all expired or unassigned leases and hit the maxThroughput
+ // per worker for all workers.
+ log.warn("Not able to assign all expiredOrUnAssignedLeases");
+ metricsScope.addData(
+ "LeaseSpillover", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ }
+
+ if (shouldRunVarianceBalancing()) {
+ final long balanceWorkerVarianceStartTime = System.currentTimeMillis();
+ final int totalNewAssignmentBeforeWorkerVarianceBalancing =
+ inMemoryStorageView.leaseToNewAssignedWorkerMap.size();
+ leaseAssignmentDecider.balanceWorkerVariance();
+ MetricsUtil.addLatency(
+ metricsScope, "BalanceWorkerVariance", balanceWorkerVarianceStartTime, MetricsLevel.DETAILED);
+ metricsScope.addData(
+ "NumOfLeasesReassignment",
+ inMemoryStorageView.leaseToNewAssignedWorkerMap.size()
+ - totalNewAssignmentBeforeWorkerVarianceBalancing,
+ StandardUnit.COUNT,
+ MetricsLevel.SUMMARY);
+ }
+
+ if (inMemoryStorageView.leaseToNewAssignedWorkerMap.isEmpty()) {
+ log.info("No new lease assignment performed in this iteration");
+ }
+
+ parallelyAssignLeases(inMemoryStorageView, metricsScope);
+ printPerWorkerLeases(inMemoryStorageView);
+ deleteStaleWorkerMetricsEntries(inMemoryStorageView, metricsScope);
+ success = true;
+ noOfContinuousFailedAttempts = 0;
+ } catch (final Exception e) {
+ log.error("LeaseAssignmentManager failed to perform lease assignment.", e);
+ noOfContinuousFailedAttempts++;
+ if (noOfContinuousFailedAttempts >= DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER) {
+ log.error(
+ "Failed to perform assignment {} times in a row, releasing leadership from worker : {}",
+ DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER,
+ currentWorkerId);
+ MetricsUtil.addCount(metricsScope, FORCE_LEADER_RELEASE_METRIC_NAME, 1, MetricsLevel.SUMMARY);
+ leaderDecider.releaseLeadershipIfHeld();
+ }
+ } finally {
+ MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
+ MetricsUtil.endScope(metricsScope);
+ }
+ }
+
+ private boolean shouldRunVarianceBalancing() {
+ final boolean response = this.lamRunCounter == 0;
+ /*
+ To avoid lamRunCounter grow large, keep it within [0,varianceBalancingFrequency).
+ If varianceBalancingFrequency is 5 lamRunCounter value will be within 0 to 4 and method return true when
+ lamRunCounter is 0.
+ */
+ this.lamRunCounter = (this.lamRunCounter + 1) % config.varianceBalancingFrequency();
+ return response;
+ }
+
+ /**
+ * Deletes the WorkerMetricStats entries which are stale(not updated since long time, ref
+ * {@link LeaseAssignmentManager#isWorkerMetricsEntryStale} for the condition to evaluate staleness)
+ */
+ private void deleteStaleWorkerMetricsEntries(
+ final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
+ final long startTime = System.currentTimeMillis();
+ try {
+ final List staleWorkerMetricsList = inMemoryStorageView.getWorkerMetricsList().stream()
+ .filter(this::isWorkerMetricsEntryStale)
+ .collect(Collectors.toList());
+ MetricsUtil.addCount(
+ metricsScope, "TotalStaleWorkerMetricsEntry", staleWorkerMetricsList.size(), MetricsLevel.DETAILED);
+ log.info("Number of stale workerMetrics entries : {}", staleWorkerMetricsList.size());
+ log.info("Stale workerMetrics list : {}", staleWorkerMetricsList);
+
+ final List> completableFutures = staleWorkerMetricsList.stream()
+ .map(workerMetrics -> CompletableFuture.supplyAsync(
+ () -> workerMetricsDAO.deleteMetrics(workerMetrics), LEASE_ASSIGNMENT_CALL_THREAD_POOL))
+ .collect(Collectors.toList());
+
+ CompletableFuture.allOf(completableFutures.toArray(new CompletableFuture[0]))
+ .join();
+ } finally {
+ MetricsUtil.addLatency(metricsScope, "StaleWorkerMetricsCleanup", startTime, MetricsLevel.DETAILED);
+ }
+ }
+
+ /**
+ * WorkerMetricStats entry is considered stale if the lastUpdateTime of the workerMetrics is older than
+ * workerMetricsStalenessThreshold * workerMetricsReporterFreqInMillis.
+ */
+ private boolean isWorkerMetricsEntryStale(final WorkerMetricStats workerMetrics) {
+ return Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now())
+ .toMillis()
+ > config.staleWorkerMetricsEntryCleanupDuration().toMillis();
+ }
+
+ private void printPerWorkerLeases(final InMemoryStorageView storageView) {
+ storageView.getActiveWorkerIdSet().forEach(activeWorkerId -> {
+ log.info(
+ "Worker : {} and total leases : {} and totalThroughput : {}",
+ activeWorkerId,
+ Optional.ofNullable(storageView.getWorkerToLeasesMap().get(activeWorkerId))
+ .orElse(Collections.EMPTY_SET)
+ .size(),
+ storageView.getWorkerToTotalAssignedThroughputMap().get(activeWorkerId));
+ });
+ }
+
+ private void parallelyAssignLeases(final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
+ final AtomicInteger failedAssignmentCounter = new AtomicInteger(0);
+ final long startTime = System.currentTimeMillis();
+ boolean success = false;
+ try {
+ CompletableFuture.allOf(inMemoryStorageView.getLeaseToNewAssignedWorkerMap().entrySet().stream()
+ // ignore leases that are heartbeating and pending graceful shutdown checkpoint.
+ .filter(entry -> !entry.getKey().blockedOnPendingCheckpoint(getNanoTimeMillis()))
+ .map(entry -> CompletableFuture.supplyAsync(
+ () -> {
+ try {
+ final Lease lease = entry.getKey();
+ if (gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled()
+ && lease.isEligibleForGracefulShutdown()) {
+ return handleGracefulLeaseHandoff(
+ lease, entry.getValue(), failedAssignmentCounter);
+ } else {
+ return handleRegularLeaseAssignment(
+ lease, entry.getValue(), failedAssignmentCounter);
+ }
+ } catch (Exception e) {
+ throw new CompletionException(e);
+ }
+ },
+ LEASE_ASSIGNMENT_CALL_THREAD_POOL))
+ .toArray(CompletableFuture[]::new))
+ .join();
+ success = true;
+ } finally {
+ MetricsUtil.addCount(
+ metricsScope, "FailedAssignmentCount", failedAssignmentCounter.get(), MetricsLevel.DETAILED);
+ MetricsUtil.addSuccessAndLatency(
+ metricsScope, "ParallelyAssignLeases", success, startTime, MetricsLevel.DETAILED);
+ }
+ }
+
+ private boolean handleGracefulLeaseHandoff(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
+ throws ProvisionedThroughputException, InvalidStateException, DependencyException {
+ final boolean response = leaseRefresher.initiateGracefulLeaseHandoff(lease, newOwner);
+ if (response) {
+ // new handoff assignment. add the timeout.
+ lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
+ } else {
+ failedAssignmentCounter.incrementAndGet();
+ }
+ return response;
+ }
+
+ private boolean handleRegularLeaseAssignment(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
+ throws ProvisionedThroughputException, InvalidStateException, DependencyException {
+ final boolean response = leaseRefresher.assignLease(lease, newOwner);
+ if (response) {
+ // Successful assignment updates the leaseCounter, update the nanoTime for counter update.
+ lease.lastCounterIncrementNanos(nanoTimeProvider.get());
+ } else {
+ failedAssignmentCounter.incrementAndGet();
+ }
+ return response;
+ }
+
+ private void publishLeaseAndWorkerCountMetrics(
+ final MetricsScope metricsScope, final InMemoryStorageView inMemoryStorageView) {
+ // Names of the metrics are kept in sync with what is published in LeaseTaker.
+ metricsScope.addData(
+ "TotalLeases", inMemoryStorageView.leaseList.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ metricsScope.addData(
+ "NumWorkers", inMemoryStorageView.activeWorkerMetrics.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ }
+
+ // Method updates all new leases with currentTime if the counter is updated since last run else keeps whatever
+ // was prev and update the prevRunLeasesState
+ private void updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
+ final List leaseList, final Long scanTime) {
+ for (final Lease lease : leaseList) {
+ final Lease prevLease = prevRunLeasesState.get(lease.leaseKey());
+
+ // make sure lease shutdown timeouts are tracked.
+ if (lease.shutdownRequested()) {
+ // previous and current leases might have same next and checkpoint owners but there is no
+ // guarantee that the latest shutdown is the same shutdown in the previous lease for example
+ // some other leaders change the lease states while this worker waiting for it's LAM run.
+ // This is the best effort to prevent marking the incorrect timeout.
+ if (isNull(prevLease) || !prevLease.shutdownRequested() || !isSameOwners(lease, prevLease)) {
+ // Add new value if previous is null, previous lease is not shutdown pending or the owners
+ // don't match
+ lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
+ } else {
+ lease.checkpointOwnerTimeoutTimestampMillis(prevLease.checkpointOwnerTimeoutTimestampMillis());
+ }
+ }
+
+ if (isNull(prevLease)) {
+ lease.lastCounterIncrementNanos(
+ isNull(lease.actualOwner())
+ // This is an unassigned lease, mark as 0L that puts this in first in assignment order
+ ? 0L
+ : scanTime);
+ } else {
+ lease.lastCounterIncrementNanos(
+ lease.leaseCounter() > prevLease.leaseCounter()
+ ? scanTime
+ : prevLease.lastCounterIncrementNanos());
+ }
+ }
+ prevRunLeasesState.clear();
+ prevRunLeasesState.putAll(leaseList.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity())));
+ }
+
+ private void prepareAfterLeaderSwitch() {
+ prevRunLeasesState.clear();
+ noOfContinuousFailedAttempts = 0;
+ }
+
+ /**
+ * In memory view of the leases and workerMetrics.
+ * This class supports queries (e.g., leases assigned to worker or total throughout assigned to worker).
+ */
+ @Getter
+ class InMemoryStorageView {
+
+ // This is in-memory view of the workerToLeaseMapping, this is updated in-memory before actual
+ // changes to storage.
+ private final Map> workerToLeasesMap = new HashMap<>();
+ /**
+ * This is computed initially after the loading leases and then updated when the
+ * {@link InMemoryStorageView#performLeaseAssignment} is called.
+ */
+ private final Map workerToTotalAssignedThroughputMap = new HashMap<>();
+ /**
+ * Captures the new assignment done during the lifecycle of single run.
+ */
+ private final Map leaseToNewAssignedWorkerMap = new HashMap<>();
+
+ /**
+ * List of all leases in the application.
+ */
+ private List leaseList;
+ /**
+ * List of workers which are active (i.e., updated metric stats before the threshold ref)
+ * {@link this#computeWorkerExpiryThresholdInSecond})
+ */
+ private List activeWorkerMetrics;
+ /**
+ * List of all workerMetrics entries from storage.
+ */
+ private List workerMetricsList;
+ /**
+ * List of active workers ids.
+ */
+ private Set activeWorkerIdSet;
+ /**
+ * Wall time in nanoseconds when the lease table scan was completed.
+ */
+ private long leaseTableScanTime = 0L;
+ /**
+ * Average throughput for all workers.
+ */
+ private double targetAverageThroughput;
+
+ /**
+ * Update {@ref inMemoryWorkerToLeasesMapping} with the change in ownership and update newLeaseAssignmentMap
+ *
+ * @param lease lease changing assignment
+ * @param newOwner new owner of the lease
+ */
+ public void performLeaseAssignment(final Lease lease, final String newOwner) {
+ final String existingOwner = lease.actualOwner();
+ workerToLeasesMap.get(existingOwner).remove(lease);
+ workerToLeasesMap
+ .computeIfAbsent(newOwner, owner -> new HashSet<>())
+ .add(lease);
+ updateWorkerThroughput(newOwner, lease.throughputKBps());
+ // Remove the same lease throughput from oldOwner
+ updateWorkerThroughput(existingOwner, -lease.throughputKBps());
+ leaseToNewAssignedWorkerMap.put(lease, newOwner);
+ }
+
+ /**
+ * Scans the LeaseTable and WorkerMetricStats in parallel and load the data and populate datastructures used
+ * in lease assignment.
+ */
+ public void loadInMemoryStorageView(final MetricsScope metricsScope) throws Exception {
+ final CompletableFuture, List>> leaseListFuture = loadLeaseListAsync();
+
+ final CompletableFuture> workerMetricsFuture = loadWorkerMetricStats();
+
+ final List workerMetricsFromStorage = workerMetricsFuture.join();
+
+ final List listOfWorkerIdOfInvalidWorkerMetricsEntry = workerMetricsFromStorage.stream()
+ .filter(workerMetrics -> !workerMetrics.isValidWorkerMetric())
+ .map(WorkerMetricStats::getWorkerId)
+ .collect(Collectors.toList());
+ log.warn("List of workerIds with invalid entries : {}", listOfWorkerIdOfInvalidWorkerMetricsEntry);
+ if (!listOfWorkerIdOfInvalidWorkerMetricsEntry.isEmpty()) {
+ metricsScope.addData(
+ "NumWorkersWithInvalidEntry",
+ listOfWorkerIdOfInvalidWorkerMetricsEntry.size(),
+ StandardUnit.COUNT,
+ MetricsLevel.SUMMARY);
+ }
+
+ // Valid entries are considered further, for validity of entry refer WorkerMetricStats#isValidWorkerMetrics
+ this.workerMetricsList = workerMetricsFromStorage.stream()
+ .filter(WorkerMetricStats::isValidWorkerMetric)
+ .collect(Collectors.toList());
+
+ log.info("Total WorkerMetricStats available : {}", workerMetricsList.size());
+ final long workerExpiryThreshold = computeWorkerExpiryThresholdInSecond();
+
+ final long countOfWorkersWithFailingWorkerMetric = workerMetricsList.stream()
+ .filter(WorkerMetricStats::isAnyWorkerMetricFailing)
+ .count();
+ if (countOfWorkersWithFailingWorkerMetric != 0) {
+ metricsScope.addData(
+ "NumWorkersWithFailingWorkerMetric",
+ countOfWorkersWithFailingWorkerMetric,
+ StandardUnit.COUNT,
+ MetricsLevel.SUMMARY);
+ }
+
+ final Map.Entry, List> leaseListResponse = leaseListFuture.join();
+ this.leaseList = leaseListResponse.getKey();
+ log.warn("Leases that failed deserialization : {}", leaseListResponse.getValue());
+ if (!leaseListResponse.getValue().isEmpty()) {
+ MetricsUtil.addCount(
+ metricsScope,
+ "LeaseDeserializationFailureCount",
+ leaseListResponse.getValue().size(),
+ MetricsLevel.SUMMARY);
+ }
+ this.leaseTableScanTime = nanoTimeProvider.get();
+ log.info("Total Leases available : {}", leaseList.size());
+
+ final double averageLeaseThroughput = leaseList.stream()
+ .filter(lease -> nonNull(lease.throughputKBps()))
+ .mapToDouble(Lease::throughputKBps)
+ .average()
+ // If none of the leases has any value, that means its app
+ // startup time and thus assigns 0 in that case to start with.
+ .orElse(0D);
+ /*
+ * If a workerMetrics has a metric (i.e. has -1 value in last index which denotes failure),
+ * skip it from activeWorkerMetrics and no new action on it will be done
+ * (new assignment etc.) until the metric has non -1 value in last index. This is to avoid performing action
+ * with the stale data on worker.
+ */
+ this.activeWorkerMetrics = workerMetricsList.stream()
+ .filter(workerMetrics -> workerMetrics.getLastUpdateTime() >= workerExpiryThreshold
+ && !workerMetrics.isAnyWorkerMetricFailing())
+ .collect(Collectors.toList());
+ log.info("activeWorkerMetrics : {}", activeWorkerMetrics.size());
+ targetAverageThroughput =
+ averageLeaseThroughput * leaseList.size() / Math.max(1, activeWorkerMetrics.size());
+ leaseList.forEach(lease -> {
+ if (isNull(lease.throughputKBps())) {
+ // If the lease is unassigned, it will not have any throughput value, use average throughput
+ // as good enough value to start with.
+ lease.throughputKBps(averageLeaseThroughput);
+ }
+ workerToLeasesMap
+ .computeIfAbsent(lease.actualOwner(), workerId -> new HashSet<>())
+ .add(lease);
+ updateWorkerThroughput(lease.actualOwner(), lease.throughputKBps());
+ });
+
+ this.activeWorkerIdSet = new HashSet<>();
+ // Calculate initial ratio
+ this.activeWorkerMetrics.forEach(workerMetrics -> {
+ activeWorkerIdSet.add(workerMetrics.getWorkerId());
+ workerMetrics.setEmaAlpha(config.workerMetricsEMAAlpha());
+ if (workerMetrics.isUsingDefaultWorkerMetric()) {
+ setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
+ workerMetrics,
+ getTotalAssignedThroughput(workerMetrics.getWorkerId()) / targetAverageThroughput);
+ }
+ });
+ }
+
+ private void updateWorkerThroughput(final String workerId, final double leaseThroughput) {
+ double value = workerToTotalAssignedThroughputMap.computeIfAbsent(workerId, worker -> (double) 0L);
+ workerToTotalAssignedThroughputMap.put(workerId, value + leaseThroughput);
+ }
+
+ private void setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
+ final WorkerMetricStats workerMetrics, final Double ratio) {
+ // for workers with default WorkerMetricStats, the operating range ceiling of 100 represents the
+ // target throughput. This way, with either heterogeneous or homogeneous fleets
+ // of explicit WorkerMetricStats and default WorkerMetricStats applications, load will be evenly
+ // distributed.
+ log.info(
+ "Worker [{}] is using default WorkerMetricStats, setting initial utilization ratio to [{}].",
+ workerMetrics.getWorkerId(),
+ ratio);
+ workerMetrics.setOperatingRange(ImmutableMap.of("T", ImmutableList.of(100L)));
+ workerMetrics.setMetricStats(ImmutableMap.of("T", ImmutableList.of(ratio * 100, ratio * 100)));
+ }
+
+ /**
+ * Calculates the value threshold in seconds for a worker to be considered as active.
+ * If a worker has not updated the WorkerMetricStats entry within this threshold, the worker is not considered
+ * as active.
+ *
+ * @return wall time in seconds
+ */
+ private long computeWorkerExpiryThresholdInSecond() {
+ final long timeInSeconds = Duration.ofMillis(System.currentTimeMillis()
+ - DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
+ * config.workerMetricsReporterFreqInMillis())
+ .getSeconds();
+ log.info("WorkerMetricStats expiry time in seconds : {}", timeInSeconds);
+ return timeInSeconds;
+ }
+
+ /**
+ * Looks at inMemoryWorkerToLeasesMapping for lease assignment and figures out if there is room considering
+ * any new assignment that would have happened.
+ */
+ public boolean isWorkerTotalThroughputLessThanMaxThroughput(final String workerId) {
+ return getTotalAssignedThroughput(workerId) <= config.maxThroughputPerHostKBps();
+ }
+
+ /**
+ * Looks at inMemoryWorkerToLeasesMapping for lease assignment of a worker and returns true if the worker has
+ * no leases assigned or less than maxNumberOfLeasesPerHost else false.
+ */
+ public boolean isWorkerAssignedLeasesLessThanMaxLeases(final String workerId) {
+ final Set assignedLeases = workerToLeasesMap.get(workerId);
+ if (CollectionUtils.isEmpty(assignedLeases)) {
+ // There are no leases assigned to the worker, that means its less than maxNumberOfLeasesPerHost.
+ return true;
+ } else {
+ return assignedLeases.size() < maxLeasesForWorker;
+ }
+ }
+
+ public Double getTotalAssignedThroughput(final String workerId) {
+ return workerToTotalAssignedThroughputMap.getOrDefault(workerId, 0D);
+ }
+
+ private CompletableFuture> loadWorkerMetricStats() {
+ return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricsDAO::getAllWorkerMetricStats));
+ }
+
+ private CompletableFuture, List>> loadLeaseListAsync() {
+ return CompletableFuture.supplyAsync(() -> loadWithRetry(() -> leaseRefresher.listLeasesParallely(
+ LEASE_ASSIGNMENT_CALL_THREAD_POOL, DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR)));
+ }
+
+ private T loadWithRetry(final Callable loadFunction) {
+ int retryAttempt = 0;
+ while (true) {
+ try {
+ return loadFunction.call();
+ } catch (final Exception e) {
+ if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
+ log.warn(
+ "Failed to load : {}, retrying",
+ loadFunction.getClass().getName(),
+ e);
+ retryAttempt++;
+ } else {
+ throw new CompletionException(e);
+ }
+ }
+ }
+ }
+ }
+
+ private long getCheckpointOwnerTimeoutTimestampMillis() {
+ // this is a future timestamp in millis that the graceful lease handoff shutdown can be considered
+ // expired. LeaseDurationMillis is used here to account for how long it might take for the
+ // lease owner to receive the shutdown signal before executing shutdown.
+ return getNanoTimeMillis()
+ + gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis()
+ + leaseDurationMillis;
+ }
+
+ private long getNanoTimeMillis() {
+ // this is not a wall clock time. But if we stick with using this time provider for calculating the elapsed
+ // time it should be okay to use in checkpoint expiration calculation.
+ return TimeUnit.NANOSECONDS.toMillis(nanoTimeProvider.get());
+ }
+
+ private static boolean isSameOwners(Lease currentLease, Lease previousLease) {
+ return Objects.equals(currentLease.leaseOwner(), previousLease.leaseOwner())
+ && Objects.equals(currentLease.checkpointOwner(), previousLease.checkpointOwner());
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/VarianceBasedLeaseAssignmentDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/VarianceBasedLeaseAssignmentDecider.java
new file mode 100644
index 000000000..97ae434a3
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/VarianceBasedLeaseAssignmentDecider.java
@@ -0,0 +1,363 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.coordinator.assignment;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.leases.Lease;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
+
+import static java.util.Objects.isNull;
+import static java.util.Objects.nonNull;
+
+/**
+ * VarianceBasedLeaseAssignmentDecider
+ * This implementation of LeaseAssignmentDecider performs lease assignment by considering the WorkerMetricStats values of workers
+ * with respect to fleet level average of that WorkerMetricStats.
+ * Rebalanced leases are assigned to workers which has maximum capacity to in terms of throughput to reach fleet level
+ * across the WorkerMetricStats value. In case of multiple WorkerMetricStats, the capacity to reach fleet level average is determined by outlier
+ * WorkerMetricStats.
+ * To minimize the variance, the algorithm picks the fleet level average of the WorkerMetricStats for workers as a
+ * pivot point and uses it to determine workers to take leases from and then assign to other workers.
+ * The threshold for considering a worker for re-balance is configurable via
+ * {@code reBalanceThreshold}. During reassignments the {@code dampeningPercentageValue} is used to achieve
+ * critical dampening.
+ */
+@Slf4j
+@KinesisClientInternalApi
+public final class VarianceBasedLeaseAssignmentDecider implements LeaseAssignmentDecider {
+ private final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView;
+ private final int dampeningPercentageValue;
+ private final int reBalanceThreshold;
+ private final boolean allowThroughputOvershoot;
+ private final Map workerMetricsToFleetLevelAverageMap = new HashMap<>();
+ private final PriorityQueue assignableWorkerSortedByAvailableCapacity;
+ private int targetLeasePerWorker;
+
+ public VarianceBasedLeaseAssignmentDecider(
+ final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView,
+ final int dampeningPercentageValue,
+ final int reBalanceThreshold,
+ final boolean allowThroughputOvershoot) {
+ this.inMemoryStorageView = inMemoryStorageView;
+ this.dampeningPercentageValue = dampeningPercentageValue;
+ this.reBalanceThreshold = reBalanceThreshold;
+ this.allowThroughputOvershoot = allowThroughputOvershoot;
+ initialize();
+ final Comparator comparator = Comparator.comparingDouble(
+ workerMetrics -> workerMetrics.computePercentageToReachAverage(workerMetricsToFleetLevelAverageMap));
+ this.assignableWorkerSortedByAvailableCapacity = new PriorityQueue<>(comparator.reversed());
+ this.assignableWorkerSortedByAvailableCapacity.addAll(
+ getAvailableWorkersForAssignment(inMemoryStorageView.getActiveWorkerMetrics()));
+ }
+
+ private void initialize() {
+ final Map workerMetricsNameToAverage = inMemoryStorageView.getActiveWorkerMetrics().stream()
+ .flatMap(workerMetrics -> workerMetrics.getMetricStats().keySet().stream()
+ .map(workerMetricsName ->
+ new SimpleEntry<>(workerMetricsName, workerMetrics.getMetricStat(workerMetricsName))))
+ .collect(Collectors.groupingBy(
+ SimpleEntry::getKey, HashMap::new, Collectors.averagingDouble(SimpleEntry::getValue)));
+
+ workerMetricsToFleetLevelAverageMap.putAll(workerMetricsNameToAverage);
+
+ final int totalWorkers =
+ Math.max(inMemoryStorageView.getActiveWorkerMetrics().size(), 1);
+ this.targetLeasePerWorker = Math.max(inMemoryStorageView.getLeaseList().size() / totalWorkers, 1);
+ }
+
+ private List getAvailableWorkersForAssignment(final List workerMetricsList) {
+ // Workers with WorkerMetricStats running hot are also available for assignment as the goal is to balance
+ // utilization
+ // always (e.g., if all workers have hot WorkerMetricStats, balance the variance between them too)
+ return workerMetricsList.stream()
+ .filter(workerMetrics -> inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(
+ workerMetrics.getWorkerId())
+ && inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId()))
+ .collect(Collectors.toList());
+ }
+
+ @Override
+ public void assignExpiredOrUnassignedLeases(final List expiredOrUnAssignedLeases) {
+ // Sort the expiredOrUnAssignedLeases using lastCounterIncrementNanos such that leases expired first are
+ // picked first.
+ // Unassigned leases have lastCounterIncrementNanos as zero and thus assigned first.
+ Collections.sort(expiredOrUnAssignedLeases, Comparator.comparing(Lease::lastCounterIncrementNanos));
+ final Set assignedLeases = new HashSet<>();
+ for (final Lease lease : expiredOrUnAssignedLeases) {
+ final WorkerMetricStats workerToAssignLease = assignableWorkerSortedByAvailableCapacity.poll();
+ if (nonNull(workerToAssignLease)) {
+ assignLease(lease, workerToAssignLease);
+ assignedLeases.add(lease);
+ } else {
+ log.info("No worker available to assign lease {}", lease.leaseKey());
+ break;
+ }
+ }
+ expiredOrUnAssignedLeases.removeAll(assignedLeases);
+ }
+
+ private List getWorkersToTakeLeasesFromIfRequired(
+ final List currentWorkerMetrics,
+ final String workerMetricsName,
+ final double workerMetricsValueAvg) {
+ final List workerIdsAboveAverage = new ArrayList<>();
+
+ final double upperLimit = workerMetricsValueAvg * (1.0D + (double) reBalanceThreshold / 100);
+ final double lowerLimit = workerMetricsValueAvg * (1.0D - (double) reBalanceThreshold / 100);
+
+ WorkerMetricStats mostLoadedWorker = null;
+
+ log.info("Range for re-balance upper threshold {} and lower threshold {}", upperLimit, lowerLimit);
+
+ boolean shouldTriggerReBalance = false;
+ for (final WorkerMetricStats workerMetrics : currentWorkerMetrics) {
+ final double currentWorkerMetricsValue = workerMetrics.getMetricStat(workerMetricsName);
+ final boolean isCurrentWorkerMetricsAboveOperatingRange =
+ workerMetrics.isWorkerMetricAboveOperatingRange(workerMetricsName);
+ /*
+ If there is any worker, whose WorkerMetricStats value is between +/- reBalanceThreshold % of workerMetricsValueAvg or if
+ worker's WorkerMetricStats value is above operating range trigger re-balance
+ */
+ if (currentWorkerMetricsValue > upperLimit
+ || currentWorkerMetricsValue < lowerLimit
+ || isCurrentWorkerMetricsAboveOperatingRange) {
+ shouldTriggerReBalance = true;
+ }
+ // Perform re-balance on the worker if its above upperLimit or if current WorkerMetricStats is above
+ // operating range.
+ if (currentWorkerMetricsValue >= upperLimit || isCurrentWorkerMetricsAboveOperatingRange) {
+ workerIdsAboveAverage.add(workerMetrics);
+ }
+ if (mostLoadedWorker == null
+ || mostLoadedWorker.getMetricStat(workerMetricsName) < currentWorkerMetricsValue) {
+ mostLoadedWorker = workerMetrics;
+ }
+ }
+
+ /*
+ If workerIdsAboveAverage is empty that means there is no worker with WorkerMetricStats value above upperLimit so pick
+ the worker with higher CPU. This can happen when there is worker with WorkerMetricStats value below lowerLimit but
+ all other workers are within upperLimit.
+ */
+ if (workerIdsAboveAverage.isEmpty()) {
+ workerIdsAboveAverage.add(mostLoadedWorker);
+ }
+
+ return shouldTriggerReBalance ? workerIdsAboveAverage : Collections.emptyList();
+ }
+
+ /**
+ * Performs the balancing of the throughput assigned to workers based on the WorkerMetricsValues of worker with respect
+ * to fleet level average.
+ * Each WorkerMetricStats is treated independently to determine workers for re-balance computed (computed based on
+ * reBalanceThreshold) are determined.
+ * The magnitude of throughput to take is determined by how much worker is away from the average of that WorkerMetricStats
+ * across fleet and in case of multiple WorkerMetricStats, the one with maximum magnitude of throughput is considered.
+ */
+ @Override
+ public void balanceWorkerVariance() {
+ final List activeWorkerMetrics = inMemoryStorageView.getActiveWorkerMetrics();
+
+ log.info("WorkerMetricStats to corresponding fleet level average : {}", workerMetricsToFleetLevelAverageMap);
+ log.info("Active WorkerMetricStats : {}", activeWorkerMetrics);
+
+ final Map workerIdToThroughputToTakeMap = new HashMap<>();
+ String largestOutlierWorkerMetricsName = "";
+ double maxThroughputTake = -1.0D;
+
+ for (final Map.Entry workerMetricsToFleetLevelAverageEntry :
+ workerMetricsToFleetLevelAverageMap.entrySet()) {
+ final String workerMetricsName = workerMetricsToFleetLevelAverageEntry.getKey();
+
+ // Filter workers that does not have current WorkerMetricStats. This is possible if application is adding a
+ // new WorkerMetricStats and currently in phase of deployment.
+ final List currentWorkerMetrics = activeWorkerMetrics.stream()
+ .filter(workerMetrics -> workerMetrics.containsMetricStat(workerMetricsName))
+ .collect(Collectors.toList());
+
+ final double fleetAverageForWorkerMetrics = workerMetricsToFleetLevelAverageEntry.getValue();
+
+ final List workerToTakeLeasesFrom = getWorkersToTakeLeasesFromIfRequired(
+ currentWorkerMetrics, workerMetricsName, fleetAverageForWorkerMetrics);
+
+ final Map workerIdToThroughputToTakeForCurrentWorkerMetrics = new HashMap<>();
+ double totalThroughputToTakeForCurrentWorkerMetrics = 0D;
+ for (final WorkerMetricStats workerToTakeLease : workerToTakeLeasesFrom) {
+ final double workerMetricsValueForWorker = workerToTakeLease.getMetricStat(workerMetricsName);
+ // Load to take based on the difference compared to the fleet level average
+ final double loadPercentageToTake =
+ (workerMetricsValueForWorker - fleetAverageForWorkerMetrics) / workerMetricsValueForWorker;
+ // Dampen the load based on dampeningPercentageValue
+ final double dampenedLoadPercentageToTake =
+ loadPercentageToTake * ((double) dampeningPercentageValue / 100);
+ final double throughputToTake =
+ inMemoryStorageView.getTotalAssignedThroughput(workerToTakeLease.getWorkerId())
+ * dampenedLoadPercentageToTake;
+ log.info(
+ "For worker : {} taking throughput : {} after dampening based on WorkerMetricStats : {}",
+ workerToTakeLease.getWorkerId(),
+ throughputToTake,
+ workerMetricsName);
+ totalThroughputToTakeForCurrentWorkerMetrics += throughputToTake;
+ workerIdToThroughputToTakeForCurrentWorkerMetrics.put(
+ workerToTakeLease.getWorkerId(), throughputToTake);
+ }
+
+ /*
+ If totalThroughputToTakeForCurrentWorkerMetrics is more than maxThroughputTake that means this WorkerMetricStats is more
+ outlier so consider this for reBalancing
+ */
+ if (maxThroughputTake < totalThroughputToTakeForCurrentWorkerMetrics) {
+ largestOutlierWorkerMetricsName = workerMetricsName;
+ workerIdToThroughputToTakeMap.clear();
+ workerIdToThroughputToTakeMap.putAll(workerIdToThroughputToTakeForCurrentWorkerMetrics);
+ maxThroughputTake = totalThroughputToTakeForCurrentWorkerMetrics;
+ }
+ }
+
+ log.info(
+ "Largest outlier WorkerMetricStats is : {} and total of {} throughput will be rebalanced",
+ largestOutlierWorkerMetricsName,
+ maxThroughputTake);
+ log.info("Workers to throughput taken from them is : {}", workerIdToThroughputToTakeMap);
+
+ final List> sortedWorkerIdToThroughputToTakeEntries =
+ new ArrayList<>(workerIdToThroughputToTakeMap.entrySet());
+ // sort entries by values.
+ Collections.sort(sortedWorkerIdToThroughputToTakeEntries, (e1, e2) -> e2.getValue()
+ .compareTo(e1.getValue()));
+
+ for (final Map.Entry workerIdToThroughputToTakeEntry :
+ sortedWorkerIdToThroughputToTakeEntries) {
+ final String workerId = workerIdToThroughputToTakeEntry.getKey();
+
+ final double throughputToTake = workerIdToThroughputToTakeEntry.getValue();
+
+ final Queue leasesToTake = getLeasesToTake(workerId, throughputToTake);
+
+ log.info(
+ "Leases taken from worker : {} are : {}",
+ workerId,
+ leasesToTake.stream().map(Lease::leaseKey).collect(Collectors.toSet()));
+
+ for (final Lease lease : leasesToTake) {
+ final WorkerMetricStats workerToAssign = assignableWorkerSortedByAvailableCapacity.poll();
+ if (nonNull(workerToAssign)
+ && workerToAssign.willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange(
+ workerMetricsToFleetLevelAverageMap,
+ inMemoryStorageView.getTargetAverageThroughput(),
+ lease.throughputKBps(),
+ targetLeasePerWorker)) {
+ log.info("No worker to assign anymore in this iteration due to hitting average values");
+ break;
+ }
+ if (nonNull(workerToAssign)) {
+ assignLease(lease, workerToAssign);
+ }
+ }
+ }
+
+ printWorkerToUtilizationLog(inMemoryStorageView.getActiveWorkerMetrics());
+ }
+
+ private Queue getLeasesToTake(final String workerId, final double throughputToTake) {
+ final Set existingLeases =
+ inMemoryStorageView.getWorkerToLeasesMap().get(workerId);
+
+ if (isNull(existingLeases) || existingLeases.isEmpty()) {
+ return new ArrayDeque<>();
+ }
+
+ if (inMemoryStorageView.getTotalAssignedThroughput(workerId) == 0D) {
+ // This is the case where throughput of this worker is zero and have 1 or more leases assigned.
+ // Its not possible to determine leases to take based on throughput so simply take 1 lease and move on.
+ return new ArrayDeque<>(new ArrayList<>(existingLeases).subList(0, 1));
+ }
+
+ return getLeasesCombiningToThroughput(workerId, throughputToTake);
+ }
+
+ private void assignLease(final Lease lease, final WorkerMetricStats workerMetrics) {
+ if (nonNull(lease.actualOwner()) && lease.actualOwner().equals(workerMetrics.getWorkerId())) {
+ // if a new owner and current owner are same then no assignment to do
+ // put back the worker as well as no assignment is done
+ assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
+ return;
+ }
+ workerMetrics.extrapolateMetricStatValuesForAddedThroughput(
+ workerMetricsToFleetLevelAverageMap,
+ inMemoryStorageView.getTargetAverageThroughput(),
+ lease.throughputKBps(),
+ targetLeasePerWorker);
+ log.info("Assigning lease : {} to worker : {}", lease.leaseKey(), workerMetrics.getWorkerId());
+ inMemoryStorageView.performLeaseAssignment(lease, workerMetrics.getWorkerId());
+ if (inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(workerMetrics.getWorkerId())
+ && inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId())) {
+ assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
+ }
+ }
+
+ private void printWorkerToUtilizationLog(final List activeWorkerMetrics) {
+ activeWorkerMetrics.forEach(workerMetrics -> log.info(
+ "WorkerId : {} and average WorkerMetricStats data : {}",
+ workerMetrics.getWorkerId(),
+ workerMetrics.getMetricStatsMap()));
+ }
+
+ private Queue getLeasesCombiningToThroughput(final String workerId, final double throughputToGet) {
+ final List assignedLeases =
+ new ArrayList<>(inMemoryStorageView.getWorkerToLeasesMap().get(workerId));
+ if (assignedLeases.isEmpty()) {
+ // This is possible if the worker is having high utilization but does not have any leases assigned to it
+ return new ArrayDeque<>();
+ }
+ // Shuffle leases to randomize what leases gets picked.
+ Collections.shuffle(assignedLeases);
+ final Queue response = new ArrayDeque<>();
+ double remainingThroughputToGet = throughputToGet;
+ for (final Lease lease : assignedLeases) {
+ // if adding this lease makes throughout to take go below zero avoid taking this lease.
+ if (remainingThroughputToGet - lease.throughputKBps() <= 0) {
+ continue;
+ }
+ remainingThroughputToGet -= lease.throughputKBps();
+ response.add(lease);
+ }
+
+ // If allowThroughputOvershoot is set to true, take a minimum throughput lease
+ if (allowThroughputOvershoot && response.isEmpty()) {
+ assignedLeases.stream()
+ .min(Comparator.comparingDouble(Lease::throughputKBps))
+ .ifPresent(response::add);
+ }
+ return response;
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersion.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersion.java
new file mode 100644
index 000000000..ccbd90858
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersion.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+/**
+ * ClientVersion support during upgrade from KCLv2.x to KCLv3.x
+ *
+ * This enum is persisted in storage, so any changes to it needs to be backward compatible.
+ * Reorganizing the values is not backward compatible, also if versions are removed, the corresponding
+ * enum value cannot be reused without backward compatibility considerations.
+ */
+public enum ClientVersion {
+ /**
+ * This is a transient start state version used during initialization of the Migration State Machine.
+ */
+ CLIENT_VERSION_INIT,
+ /**
+ * This version is used during the upgrade of an application from KCLv2.x to KCLv3.x, in this version
+ * KCL workers will emit WorkerMetricStats and run KCLv2.x algorithms for leader election and lease
+ * assignment. KCL will also monitor for upgrade to KCLv3.x readiness of the worker fleet.
+ */
+ CLIENT_VERSION_UPGRADE_FROM_2X,
+ /**
+ * This version is used during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK,
+ * which can only be initiated using a KCL migration tool, when customer wants to revert to KCLv2.x functionality.
+ * In this version, KCL will not emit WorkerMetricStats and run KCLv2.x algorithms for leader election
+ * and lease assignment. In this version, KCL will monitor for roll-forward scenario where
+ * client version is updated to CLIENT_VERSION_UPGRADE_FROM_2X using the migration tool.
+ */
+ CLIENT_VERSION_2X,
+ /**
+ * When workers are operating in CLIENT_VERSION_UPGRADE_FROM_2X and when worker fleet is determined to be
+ * KCLv3.x ready (when lease table GSI is active and worker-metrics are being emitted by all lease owners)
+ * then the leader will initiate the switch to KCLv3.x algorithms for leader election and lease assignment,
+ * by using this version and persisting it in the {@link MigrationState} that allows all worker hosts
+ * to also flip to KCLv3.x functionality. In this KCL will also monitor for rollback to detect when the
+ * customer updates version to CLIENT_VERSION_2X using migration tool, so that it instantly flips back
+ * to CLIENT_VERSION_2X.
+ */
+ CLIENT_VERSION_3X_WITH_ROLLBACK,
+ /**
+ * A new application starting KCLv3.x or an upgraded application from KCLv2.x after upgrade is successful
+ * can use this version to default all KCLv3.x algorithms without any monitor to rollback.
+ */
+ CLIENT_VERSION_3X;
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitor.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitor.java
new file mode 100644
index 000000000..41617eddf
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitor.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.time.Duration;
+import java.util.Random;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
+
+/**
+ * Change monitor for MigrationState.clientVersion to notify a callback if the value
+ * changes from a given value. This monitor will be run to monitor
+ * rollback, roll-forward and also upgrade to 3.x scenarios. Look at {@link ClientVersion}
+ * for more details.
+ *
+ * Since all KCL workers will be running the monitor, the monitor poll interval uses
+ * a random jitter to stagger the reads to ddb.
+ *
+ * The class is thread-safe and will invoke callback on a separate thread.
+ */
+@Slf4j
+@RequiredArgsConstructor
+@ThreadSafe
+@KinesisClientInternalApi
+public class ClientVersionChangeMonitor implements Runnable {
+
+ /**
+ * Interface of a callback to invoke when monitor condition is true.
+ */
+ public interface ClientVersionChangeCallback {
+ void accept(final MigrationState currentMigrationState) throws InvalidStateException, DependencyException;
+ }
+
+ private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
+ private static final double JITTER_FACTOR = 0.5;
+
+ private final MetricsFactory metricsFactory;
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ScheduledExecutorService stateMachineThreadPool;
+ private final ClientVersionChangeCallback callback;
+ private final ClientVersion expectedVersion;
+ private final Random random;
+
+ private ScheduledFuture> scheduledFuture;
+
+ public synchronized void startMonitor() {
+ if (scheduledFuture == null) {
+ final long jitter = (long) (random.nextDouble() * MONITOR_INTERVAL_MILLIS * JITTER_FACTOR);
+ log.info(
+ "Monitoring for MigrationState client version change from {} every {}ms with initial delay of {}ms",
+ expectedVersion,
+ MONITOR_INTERVAL_MILLIS,
+ MONITOR_INTERVAL_MILLIS + jitter);
+ scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
+ this, MONITOR_INTERVAL_MILLIS + jitter, MONITOR_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder(getClass().getSimpleName())
+ .append("[")
+ .append(expectedVersion)
+ .append("]")
+ .toString();
+ }
+
+ /**
+ * Cancel the monitor explicity before the condition is met, e.g. when the worker is going down.
+ * Note on synchronization: callback of this monitor is invoked while holding the lock on this monitor object.
+ * If cancel is called from within the same lock context that callback uses, then it can lead to
+ * deadlock. Ensure synchronization context between callback the caller of cancel is not shared.
+ */
+ public synchronized void cancel() {
+ if (scheduledFuture != null) {
+ log.info("Cancelling {}", this);
+ scheduledFuture.cancel(false);
+ } else {
+ log.info("Monitor {} is not running", this);
+ }
+ }
+
+ @Override
+ public synchronized void run() {
+ try {
+ if (scheduledFuture == null) {
+ log.debug("Monitor has been cancelled, not running...");
+ return;
+ }
+ emitMetrics();
+ final MigrationState migrationState =
+ (MigrationState) coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
+ if (migrationState != null) {
+ if (migrationState.getClientVersion() != expectedVersion) {
+ log.info("MigrationState client version has changed {}, invoking monitor callback", migrationState);
+ callback.accept(migrationState);
+ log.info("Callback successful, monitoring cancelling itself.");
+ // stop further monitoring
+ scheduledFuture.cancel(false);
+ scheduledFuture = null;
+ } else {
+ log.debug("No change detected {}", this);
+ }
+ }
+ } catch (final Exception e) {
+ log.warn(
+ "Exception occurred when monitoring for client version change from {}, will retry in {}",
+ expectedVersion,
+ MONITOR_INTERVAL_MILLIS,
+ e);
+ }
+ }
+
+ private void emitMetrics() {
+ final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
+ try {
+ switch (expectedVersion) {
+ case CLIENT_VERSION_3X_WITH_ROLLBACK:
+ scope.addData("CurrentState:3xWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ break;
+ case CLIENT_VERSION_2X:
+ case CLIENT_VERSION_UPGRADE_FROM_2X:
+ scope.addData("CurrentState:2xCompatibleWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ break;
+ default:
+ throw new IllegalStateException(String.format("Unexpected version %s", expectedVersion.name()));
+ }
+ } finally {
+ MetricsUtil.endScope(scope);
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion2xState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion2xState.java
new file mode 100644
index 000000000..45d29a413
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion2xState.java
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.util.Random;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ScheduledExecutorService;
+
+import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
+
+/**
+ * State for CLIENT_VERSION_2X. In this state, the only allowed valid transition is
+ * the roll-forward scenario which can only be performed using the KCL Migration tool.
+ * So when the state machine enters this state, a monitor is started to detect the
+ * roll-forward scenario.
+ */
+@KinesisClientInternalApi
+@RequiredArgsConstructor
+@Slf4j
+@ThreadSafe
+public class MigrationClientVersion2xState implements MigrationClientVersionState {
+ private final MigrationStateMachine stateMachine;
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ScheduledExecutorService stateMachineThreadPool;
+ private final DynamicMigrationComponentsInitializer initializer;
+ private final Random random;
+
+ private ClientVersionChangeMonitor rollForwardMonitor;
+ private boolean entered = false;
+ private boolean left = false;
+
+ @Override
+ public ClientVersion clientVersion() {
+ return CLIENT_VERSION_2X;
+ }
+
+ @Override
+ public synchronized void enter(final ClientVersion fromClientVersion) {
+ if (!entered) {
+ log.info("Entering {} from {}", this, fromClientVersion);
+ initializer.initializeClientVersionFor2x(fromClientVersion);
+
+ log.info("Starting roll-forward monitor");
+ rollForwardMonitor = new ClientVersionChangeMonitor(
+ initializer.metricsFactory(),
+ coordinatorStateDAO,
+ stateMachineThreadPool,
+ this::onClientVersionChange,
+ clientVersion(),
+ random);
+ rollForwardMonitor.startMonitor();
+ entered = true;
+ } else {
+ log.info("Not entering {}", left ? "already exited state" : "already entered state");
+ }
+ }
+
+ @Override
+ public synchronized void leave() {
+ if (entered && !left) {
+ log.info("Leaving {}", this);
+ cancelRollForwardMonitor();
+ left = false;
+ } else {
+ log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName();
+ }
+
+ /**
+ * Callback handler to handle client version changes in MigrationState in DDB.
+ * @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_2X
+ * @throws InvalidStateException during transition to the next state based on the new ClientVersion
+ * or if the new state in DDB is unexpected.
+ */
+ private synchronized void onClientVersionChange(@NonNull final MigrationState newState)
+ throws InvalidStateException, DependencyException {
+ if (!entered || left) {
+ log.warn("Received client version change notification on inactive state {}", this);
+ return;
+ }
+ final MetricsScope scope =
+ MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
+ try {
+ if (newState.getClientVersion() == CLIENT_VERSION_UPGRADE_FROM_2X) {
+ log.info(
+ "A roll-forward has been initiated for the application. Transition to {}",
+ CLIENT_VERSION_UPGRADE_FROM_2X);
+ // If this succeeds, the monitor will cancel itself.
+ stateMachine.transitionTo(CLIENT_VERSION_UPGRADE_FROM_2X, newState);
+ } else {
+ // This should not happen, so throw an exception that allows the monitor to continue monitoring
+ // changes, this allows KCL to operate in the current state and keep monitoring until a valid
+ // state transition is possible.
+ // However, there could be a split brain here, new workers will use DDB value as source of truth,
+ // so we could also write back CLIENT_VERSION_2X to DDB to ensure all workers have consistent
+ // behavior.
+ // Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
+ // so keeping it simple and not writing back to DDB, the error log below would help capture
+ // any strange behavior if this happens.
+ log.error(
+ "Migration state has invalid client version {}. Transition from {} is not supported",
+ newState,
+ CLIENT_VERSION_2X);
+ throw new InvalidStateException(String.format("Unexpected new state %s", newState));
+ }
+ } catch (final InvalidStateException | DependencyException e) {
+ scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ throw e;
+ } finally {
+ MetricsUtil.endScope(scope);
+ }
+ }
+
+ private void cancelRollForwardMonitor() {
+ if (rollForwardMonitor != null) {
+ final ClientVersionChangeMonitor localRollForwardMonitor = rollForwardMonitor;
+ CompletableFuture.supplyAsync(() -> {
+ log.info("Cancelling roll-forward monitor");
+ localRollForwardMonitor.cancel();
+ return null;
+ });
+ rollForwardMonitor = null;
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xState.java
new file mode 100644
index 000000000..1e8573111
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xState.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+
+/**
+ * State for CLIENT_VERSION_3X which enables KCL to run 3.x algorithms on new KCLv3.x application
+ * or successfully upgraded application which upgraded from v2.x. This is a terminal state of the
+ * state machine and no rollbacks are supported in this state.
+ */
+@KinesisClientInternalApi
+@RequiredArgsConstructor
+@Slf4j
+@ThreadSafe
+public class MigrationClientVersion3xState implements MigrationClientVersionState {
+ private final MigrationStateMachine stateMachine;
+ private final DynamicMigrationComponentsInitializer initializer;
+ private boolean entered = false;
+ private boolean left = false;
+
+ @Override
+ public ClientVersion clientVersion() {
+ return ClientVersion.CLIENT_VERSION_3X;
+ }
+
+ @Override
+ public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
+ if (!entered) {
+ log.info("Entering {} from {}", this, fromClientVersion);
+ initializer.initializeClientVersionFor3x(fromClientVersion);
+ entered = true;
+ } else {
+ log.info("Not entering {}", left ? "already exited state" : "already entered state");
+ }
+ }
+
+ @Override
+ public void leave() {
+ if (entered && !left) {
+ log.info("Leaving {}", this);
+ entered = false;
+ left = true;
+ } else {
+ log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName();
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xWithRollbackState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xWithRollbackState.java
new file mode 100644
index 000000000..6235c5a93
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xWithRollbackState.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.util.Random;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ScheduledExecutorService;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
+
+/**
+ * State for CLIENT_VERSION_3X_WITH_ROLLBACK which enables KCL to run its 3.x compliant algorithms
+ * during the upgrade process after all KCL workers in the fleet are 3.x complaint. Since this
+ * is an instant switch from CLIENT_VERSION_UPGRADE_FROM_2X, it also supports rollback if customers
+ * see regression to allow for instant rollbacks as well. This would be achieved by customers
+ * running a KCL migration tool to update MigrationState in DDB. So this state monitors for
+ * rollback triggers and performs state transitions accordingly.
+ */
+@Slf4j
+@KinesisClientInternalApi
+@RequiredArgsConstructor
+@ThreadSafe
+public class MigrationClientVersion3xWithRollbackState implements MigrationClientVersionState {
+
+ private final MigrationStateMachine stateMachine;
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ScheduledExecutorService stateMachineThreadPool;
+ private final DynamicMigrationComponentsInitializer initializer;
+ private final Random random;
+
+ private ClientVersionChangeMonitor rollbackMonitor;
+ private boolean entered;
+ private boolean left;
+
+ @Override
+ public ClientVersion clientVersion() {
+ return ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
+ }
+
+ @Override
+ public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
+ if (!entered) {
+ log.info("Entering {} from {}", this, fromClientVersion);
+ initializer.initializeClientVersionFor3xWithRollback(fromClientVersion);
+ // we need to run the rollback monitor
+ log.info("Starting rollback monitor");
+ rollbackMonitor = new ClientVersionChangeMonitor(
+ initializer.metricsFactory(),
+ coordinatorStateDAO,
+ stateMachineThreadPool,
+ this::onClientVersionChange,
+ clientVersion(),
+ random);
+ rollbackMonitor.startMonitor();
+ entered = true;
+ } else {
+ log.info("Not entering {}", left ? "already exited state" : "already entered state");
+ }
+ }
+
+ @Override
+ public void leave() {
+ if (entered && !left) {
+ log.info("Leaving {}", this);
+ cancelRollbackMonitor();
+ entered = false;
+ left = true;
+ } else {
+ log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
+ }
+ }
+
+ private synchronized void onClientVersionChange(final MigrationState newState)
+ throws InvalidStateException, DependencyException {
+ if (!entered || left) {
+ log.warn("Received client version change notification on inactive state {}", this);
+ return;
+ }
+ final MetricsScope scope =
+ MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
+ try {
+ switch (newState.getClientVersion()) {
+ case CLIENT_VERSION_2X:
+ log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X);
+ stateMachine.transitionTo(ClientVersion.CLIENT_VERSION_2X, newState);
+ break;
+ case CLIENT_VERSION_3X:
+ log.info("Customer has switched to 3.x after successful upgrade, state machine will move to a"
+ + "terminal state and stop monitoring. Rollbacks will no longer be supported anymore");
+ stateMachine.transitionTo(CLIENT_VERSION_3X, newState);
+ // This worker will still be running the migrationAdaptive components in 3.x mode which will
+ // no longer dynamically switch back to 2.x mode, however to directly run 3.x component without
+ // adaption to migration (i.e. move to CLIENT_VERSION_3X state), it requires this worker to go
+ // through the current deployment which initiated the switch to 3.x mode.
+ break;
+ default:
+ // This should not happen, so throw an exception that allows the monitor to continue monitoring
+ // changes, this allows KCL to operate in the current state and keep monitoring until a valid
+ // state transition is possible.
+ // However, there could be a split brain here, new workers will use DDB value as source of truth,
+ // so we could also write back CLIENT_VERSION_3X_WITH_ROLLBACK to DDB to ensure all workers have
+ // consistent behavior.
+ // Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
+ // so keeping it simple and not writing back to DDB, the error log below would help capture
+ // any strange behavior if this happens.
+ log.error("Migration state has invalid client version {}", newState);
+ throw new InvalidStateException(String.format("Unexpected new state %s", newState));
+ }
+ } catch (final InvalidStateException | DependencyException e) {
+ scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ throw e;
+ } finally {
+ MetricsUtil.endScope(scope);
+ }
+ }
+
+ private void cancelRollbackMonitor() {
+ if (rollbackMonitor != null) {
+ final ClientVersionChangeMonitor localRollbackMonitor = rollbackMonitor;
+ CompletableFuture.supplyAsync(() -> {
+ log.info("Cancelling rollback monitor");
+ localRollbackMonitor.cancel();
+ return null;
+ });
+ rollbackMonitor = null;
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionState.java
new file mode 100644
index 000000000..c1d8507ed
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionState.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+
+/**
+ * Interface of a state implementation for the MigrationStateMachine
+ */
+public interface MigrationClientVersionState {
+
+ /**
+ * The associated clientVersion this state corresponds to
+ * @return ClientVersion that this state implements the logic for.
+ */
+ ClientVersion clientVersion();
+
+ /**
+ * Enter the state and perform the business logic of being in this state
+ * which includes performing any monitoring that allows the next state
+ * transition and also initializing the KCL based on the ClientVersion.
+ * @param fromClientVersion from previous state if any specific action must
+ * be taken based on the state from which this state
+ * is being entered from.
+ * @throws DependencyException if DDB fails in unexpected ways for those states
+ * that create the GSI
+ */
+ void enter(ClientVersion fromClientVersion) throws DependencyException;
+
+ /**
+ * Invoked after the transition to another state has occurred
+ * to allow printing any helpful logs or performing cleanup.
+ */
+ void leave();
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionStateInitializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionStateInitializer.java
new file mode 100644
index 000000000..970bd6ede
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionStateInitializer.java
@@ -0,0 +1,263 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.Callable;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
+import software.amazon.kinesis.coordinator.CoordinatorState;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X;
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
+import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
+
+/**
+ * Initializer to determine start state of the state machine which identifies the
+ * state to initialize KCL when it is starting up. The initial state is determined based on the
+ * customer configured {@link ClientVersionConfig} and the current {@link MigrationState} in DDB,
+ * as follows
+ * ClientVersionConfig | MigrationState (DDB) | initial client version
+ * --------------------+---------------------------------+--------------------------------
+ * COMPATIBLE_WITH_2X | Does not exist | CLIENT_VERSION_UPGRADE_FROM_2X
+ * 3X | Does not exist | CLIENT_VERSION_3X
+ * COMPATIBLE_WITH_2X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X_WITH_ROLLBACK
+ * 3X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X
+ * any | CLIENT_VERSION_2X | CLIENT_VERSION_2X
+ * any | CLIENT_VERSION_UPGRADE_FROM_2X | CLIENT_VERSION_UPGRADE_FROM_2X
+ * any | CLIENT_VERSION_3X | CLIENT_VERSION_3X
+ */
+@KinesisClientInternalApi
+@RequiredArgsConstructor
+@Slf4j
+@ThreadSafe
+public class MigrationClientVersionStateInitializer {
+ private static final int MAX_INITIALIZATION_RETRY = 10;
+ private static final long INITIALIZATION_RETRY_DELAY_MILLIS = 1000L;
+ /**
+ * A jitter factor of 10% to stagger the retries.
+ */
+ private static final double JITTER_FACTOR = 0.1;
+
+ private final Callable timeProvider;
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ClientVersionConfig clientVersionConfig;
+ private final Random random;
+ private final String workerIdentifier;
+
+ public SimpleEntry getInitialState() throws DependencyException {
+ log.info("Initializing migration state machine starting state, configured version {}", clientVersionConfig);
+
+ try {
+ MigrationState migrationState = getMigrationStateFromDynamo();
+ int retryCount = 0;
+ while (retryCount++ < MAX_INITIALIZATION_RETRY) {
+ final ClientVersion initialClientVersion = getClientVersionForInitialization(migrationState);
+ if (migrationState.getClientVersion() != initialClientVersion) {
+ // If update fails, the value represents current state in dynamo
+ migrationState = updateMigrationStateInDynamo(migrationState, initialClientVersion);
+ if (migrationState.getClientVersion() == initialClientVersion) {
+ // update succeeded. Transition to the state
+ return new SimpleEntry<>(initialClientVersion, migrationState);
+ }
+ final long delay = getInitializationRetryDelay();
+ log.warn(
+ "Failed to update migration state with {}, retry after delay {}",
+ initialClientVersion,
+ delay);
+ safeSleep(delay);
+ } else {
+ return new SimpleEntry<>(initialClientVersion, migrationState);
+ }
+ }
+ } catch (final InvalidStateException e) {
+ log.error("Unable to initialize state machine", e);
+ }
+ throw new DependencyException(
+ new RuntimeException("Unable to determine initial state for migration state machine"));
+ }
+
+ public ClientVersion getClientVersionForInitialization(final MigrationState migrationState) {
+ final ClientVersion nextClientVersion;
+ switch (migrationState.getClientVersion()) {
+ case CLIENT_VERSION_INIT:
+ // There is no state in DDB, set state to config version and transition to configured version.
+ nextClientVersion = getNextClientVersionBasedOnConfigVersion();
+ log.info("Application is starting in {}", nextClientVersion);
+ break;
+ case CLIENT_VERSION_3X_WITH_ROLLBACK:
+ if (clientVersionConfig == ClientVersionConfig.CLIENT_VERSION_CONFIG_3X) {
+ // upgrade successful, allow transition to 3x.
+ log.info("Application has successfully upgraded, transitioning to {}", CLIENT_VERSION_3X);
+ nextClientVersion = CLIENT_VERSION_3X;
+ break;
+ }
+ log.info("Initialize with {}", CLIENT_VERSION_3X_WITH_ROLLBACK);
+ nextClientVersion = migrationState.getClientVersion();
+ break;
+ case CLIENT_VERSION_2X:
+ log.info("Application has rolled-back, initialize with {}", CLIENT_VERSION_2X);
+ nextClientVersion = migrationState.getClientVersion();
+ break;
+ case CLIENT_VERSION_UPGRADE_FROM_2X:
+ log.info("Application is upgrading, initialize with {}", CLIENT_VERSION_UPGRADE_FROM_2X);
+ nextClientVersion = migrationState.getClientVersion();
+ break;
+ case CLIENT_VERSION_3X:
+ log.info("Initialize with {}", CLIENT_VERSION_3X);
+ nextClientVersion = migrationState.getClientVersion();
+ break;
+ default:
+ throw new IllegalStateException(String.format("Unknown version in DDB %s", migrationState));
+ }
+ return nextClientVersion;
+ }
+
+ /**
+ * Update the migration state's client version in dynamo conditional on the current client version
+ * in dynamo. So that if another worker updates the value first, the update fails. If the update fails,
+ * the method will read the latest value and return so that initialization can be retried.
+ * If the value does not exist in dynamo, it will creat it.
+ */
+ private MigrationState updateMigrationStateInDynamo(
+ final MigrationState migrationState, final ClientVersion nextClientVersion) throws InvalidStateException {
+ try {
+ if (migrationState.getClientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
+ migrationState.update(nextClientVersion, workerIdentifier);
+ log.info("Creating {}", migrationState);
+ final boolean created = coordinatorStateDAO.createCoordinatorStateIfNotExists(migrationState);
+ if (!created) {
+ log.debug("Create {} did not succeed", migrationState);
+ return getMigrationStateFromDynamo();
+ }
+ } else {
+ log.info("Updating {} with {}", migrationState, nextClientVersion);
+ final Map expectations =
+ migrationState.getDynamoClientVersionExpectation();
+ migrationState.update(nextClientVersion, workerIdentifier);
+ final boolean updated =
+ coordinatorStateDAO.updateCoordinatorStateWithExpectation(migrationState, expectations);
+ if (!updated) {
+ log.debug("Update {} did not succeed", migrationState);
+ return getMigrationStateFromDynamo();
+ }
+ }
+ return migrationState;
+ } catch (final ProvisionedThroughputException | DependencyException e) {
+ log.debug(
+ "Failed to update migration state {} with {}, return previous value to trigger a retry",
+ migrationState,
+ nextClientVersion,
+ e);
+ return migrationState;
+ }
+ }
+
+ private ClientVersion getNextClientVersionBasedOnConfigVersion() {
+ switch (clientVersionConfig) {
+ case CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X:
+ return CLIENT_VERSION_UPGRADE_FROM_2X;
+ case CLIENT_VERSION_CONFIG_3X:
+ return CLIENT_VERSION_3X;
+ }
+ throw new IllegalStateException(String.format("Unknown configured Client version %s", clientVersionConfig));
+ }
+
+ /**
+ * Read the current {@link MigrationState} from DDB with retries.
+ * @return current Migration state from DDB, if none exists, an initial Migration State with CLIENT_VERSION_INIT
+ * will be returned
+ * @throws InvalidStateException, this occurs when dynamo table does not exist in which retrying is not useful.
+ */
+ private MigrationState getMigrationStateFromDynamo() throws InvalidStateException {
+ return executeCallableWithRetryAndJitter(
+ () -> {
+ final CoordinatorState state = coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
+ if (state == null) {
+ log.info("No Migration state available in DDB");
+ return new MigrationState(MIGRATION_HASH_KEY, workerIdentifier);
+ }
+ if (state instanceof MigrationState) {
+ log.info("Current migration state in DDB {}", state);
+ return (MigrationState) state;
+ }
+ throw new InvalidStateException(
+ String.format("Unexpected state found not confirming to MigrationState schema %s", state));
+ },
+ "get MigrationState from DDB");
+ }
+
+ /**
+ * Helper method to retry a given callable upto MAX_INITIALIZATION_RETRY times for all retryable exceptions.
+ * It considers InvalidStateException as non-retryable exception. During retry, it will compute a delay
+ * with jitter before retrying.
+ * @param callable callable to invoke either until it succeeds or max retry attempts exceed.
+ * @param description a meaningful description to log exceptions
+ * @return the value returned by the callable
+ * @param Return type of the callable
+ * @throws InvalidStateException If the callable throws InvalidStateException, it will not be retried and will
+ * be thrown back.
+ */
+ private T executeCallableWithRetryAndJitter(final Callable callable, final String description)
+ throws InvalidStateException {
+ int retryCount = 0;
+ while (retryCount++ < MAX_INITIALIZATION_RETRY) {
+ try {
+ return callable.call();
+ } catch (final Exception e) {
+ if (e instanceof InvalidStateException) {
+ // throw the non-retryable exception
+ throw (InvalidStateException) e;
+ }
+ final long delay = getInitializationRetryDelay();
+ log.warn("Failed to {}, retry after delay {}", description, delay, e);
+
+ safeSleep(delay);
+ }
+ }
+ throw new RuntimeException(
+ String.format("Failed to %s after %d retries, giving up", description, MAX_INITIALIZATION_RETRY));
+ }
+
+ private void safeSleep(final long delay) {
+ try {
+ Thread.sleep(delay);
+ } catch (final InterruptedException ie) {
+ log.debug("Interrupted sleep during state machine initialization retry");
+ }
+ }
+
+ /**
+ * Generate a delay with jitter that is factor of the interval.
+ * @return delay with jitter
+ */
+ private long getInitializationRetryDelay() {
+ final long jitter = (long) (random.nextDouble() * JITTER_FACTOR * INITIALIZATION_RETRY_DELAY_MILLIS);
+ return INITIALIZATION_RETRY_DELAY_MILLIS + jitter;
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionUpgradeFrom2xState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionUpgradeFrom2xState.java
new file mode 100644
index 000000000..86106a079
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionUpgradeFrom2xState.java
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ScheduledExecutorService;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
+import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
+
+/**
+ * State for CLIENT_VERSION_UPGRADE_FROM_2X. When state machine enters this state,
+ * KCL is initialized to operate in dual mode for Lease assignment and Leader decider algorithms
+ * which initially start in 2.x compatible mode and when all the KCL workers are 3.x compliant,
+ * it dynamically switches to the 3.x algorithms. It also monitors for rollback
+ * initiated from customer via the KCL migration tool and instantly switches back to the 2.x
+ * complaint algorithms.
+ * The allowed state transitions are to CLIENT_VERSION_3X_WITH_ROLLBACK when KCL workers are
+ * 3.x complaint, and to CLIENT_VERSION_2X when customer has initiated a rollback.
+ * Only the leader KCL worker performs migration ready monitor and notifies all workers (including
+ * itself) via a MigrationState update. When all worker's monitor notice the MigrationState change
+ * (including itself), it will transition to CLIENT_VERSION_3X_WITH_ROLLBACK.
+ */
+@KinesisClientInternalApi
+@RequiredArgsConstructor
+@Slf4j
+@ThreadSafe
+public class MigrationClientVersionUpgradeFrom2xState implements MigrationClientVersionState {
+ private final MigrationStateMachine stateMachine;
+ private final Callable timeProvider;
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ScheduledExecutorService stateMachineThreadPool;
+ private final DynamicMigrationComponentsInitializer initializer;
+ private final Random random;
+ private final MigrationState currentMigrationState;
+ private final long flipTo3XStabilizerTimeInSeconds;
+
+ private MigrationReadyMonitor migrationMonitor;
+ private ClientVersionChangeMonitor clientVersionChangeMonitor;
+ private boolean entered = false;
+ private boolean left = false;
+
+ @Override
+ public ClientVersion clientVersion() {
+ return ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
+ }
+
+ @Override
+ public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
+ if (!entered) {
+ log.info("Entering state {} from {}", this, fromClientVersion);
+ initializer.initializeClientVersionForUpgradeFrom2x(fromClientVersion);
+
+ log.info("Starting migration ready monitor to monitor 3.x compliance of the KCL workers");
+ migrationMonitor = new MigrationReadyMonitor(
+ initializer.metricsFactory(),
+ timeProvider,
+ initializer.leaderDecider(),
+ initializer.workerIdentifier(),
+ initializer.workerMetricsDAO(),
+ initializer.workerMetricsExpirySeconds(),
+ initializer.leaseRefresher(),
+ stateMachineThreadPool,
+ this::onMigrationReady,
+ flipTo3XStabilizerTimeInSeconds);
+ migrationMonitor.startMonitor();
+
+ log.info("Starting monitor for rollback and flip to 3.x");
+ clientVersionChangeMonitor = new ClientVersionChangeMonitor(
+ initializer.metricsFactory(),
+ coordinatorStateDAO,
+ stateMachineThreadPool,
+ this::onClientVersionChange,
+ clientVersion(),
+ random);
+ clientVersionChangeMonitor.startMonitor();
+ entered = true;
+ } else {
+ log.info("Not entering {}", left ? "already exited state" : "already entered state");
+ }
+ }
+
+ @Override
+ public synchronized void leave() {
+ if (entered && !left) {
+ log.info("Leaving {}", this);
+ cancelMigrationReadyMonitor();
+ cancelClientChangeVersionMonitor();
+ entered = false;
+ } else {
+ log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName();
+ }
+
+ private synchronized void onMigrationReady() {
+ // this is invoked on the leader worker only
+ if (!entered || left || migrationMonitor == null) {
+ log.info("Ignoring migration ready monitor, state already transitioned");
+ return;
+ }
+ // update dynamo with the state to toggle to 3.x
+ // and let the clientVersionChange kick in to do state transition
+ // this way both leader and non-leader worker all transition when
+ // it discovers the update from ddb.
+ if (updateDynamoStateForTransition()) {
+ // successfully toggled the state, now we can cancel the monitor
+ cancelMigrationReadyMonitor();
+ }
+ // else - either migration ready monitor will retry or
+ // client Version change callback will initiate the next state transition.
+ }
+
+ private void cancelMigrationReadyMonitor() {
+ if (migrationMonitor != null) {
+ final MigrationReadyMonitor localMigrationMonitor = migrationMonitor;
+ CompletableFuture.supplyAsync(() -> {
+ log.info("Cancelling migration ready monitor");
+ localMigrationMonitor.cancel();
+ return null;
+ });
+ migrationMonitor = null;
+ }
+ }
+
+ private void cancelClientChangeVersionMonitor() {
+ if (clientVersionChangeMonitor != null) {
+ final ClientVersionChangeMonitor localClientVersionChangeMonitor = clientVersionChangeMonitor;
+ CompletableFuture.supplyAsync(() -> {
+ log.info("Cancelling client change version monitor");
+ localClientVersionChangeMonitor.cancel();
+ return null;
+ });
+ clientVersionChangeMonitor = null;
+ }
+ }
+
+ /**
+ * Callback handler to handle client version changes in MigrationState in DDB.
+ * @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_UPGRADE_FROM_2X
+ * @throws InvalidStateException during transition to the next state based on the new ClientVersion
+ * or if the new state in DDB is unexpected.
+ */
+ private synchronized void onClientVersionChange(final MigrationState newState)
+ throws InvalidStateException, DependencyException {
+ if (!entered || left) {
+ log.warn("Received client version change notification on inactive state {}", this);
+ return;
+ }
+ final MetricsScope scope =
+ MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
+ try {
+ switch (newState.getClientVersion()) {
+ case CLIENT_VERSION_2X:
+ log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X);
+ // cancel monitor asynchronously
+ cancelMigrationReadyMonitor();
+ stateMachine.transitionTo(CLIENT_VERSION_2X, newState);
+ break;
+ case CLIENT_VERSION_3X_WITH_ROLLBACK:
+ log.info("KCL workers are v3.x compliant, transition to {}", CLIENT_VERSION_3X_WITH_ROLLBACK);
+ cancelMigrationReadyMonitor();
+ stateMachine.transitionTo(CLIENT_VERSION_3X_WITH_ROLLBACK, newState);
+ break;
+ default:
+ // This should not happen, so throw an exception that allows the monitor to continue monitoring
+ // changes, this allows KCL to operate in the current state and keep monitoring until a valid
+ // state transition is possible.
+ // However, there could be a split brain here, new workers will use DDB value as source of truth,
+ // so we could also write back CLIENT_VERSION_UPGRADE_FROM_2X to DDB to ensure all workers have
+ // consistent behavior.
+ // Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
+ // so keeping it simple and not writing back to DDB, the error log below would help capture
+ // any strange behavior if this happens.
+ log.error("Migration state has invalid client version {}", newState);
+ throw new InvalidStateException(String.format("Unexpected new state %s", newState));
+ }
+ } catch (final DependencyException | InvalidStateException e) {
+ scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ throw e;
+ } finally {
+ MetricsUtil.endScope(scope);
+ }
+ }
+
+ private boolean updateDynamoStateForTransition() {
+ final MetricsScope scope =
+ MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
+ try {
+ final MigrationState newMigrationState = currentMigrationState
+ .copy()
+ .update(CLIENT_VERSION_3X_WITH_ROLLBACK, initializer.workerIdentifier());
+ log.info("Updating Migration State in DDB with {} prev state {}", newMigrationState, currentMigrationState);
+ return coordinatorStateDAO.updateCoordinatorStateWithExpectation(
+ newMigrationState, currentMigrationState.getDynamoClientVersionExpectation());
+ } catch (final Exception e) {
+ log.warn(
+ "Exception occurred when toggling to {}, upgradeReadyMonitor will retry the update"
+ + " if upgrade condition is still true",
+ CLIENT_VERSION_3X_WITH_ROLLBACK,
+ e);
+ scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ return false;
+ } finally {
+ MetricsUtil.endScope(scope);
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitor.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitor.java
new file mode 100644
index 000000000..f306a1e35
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitor.java
@@ -0,0 +1,354 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.time.Duration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.LeaderDecider;
+import software.amazon.kinesis.leases.Lease;
+import software.amazon.kinesis.leases.LeaseRefresher;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
+import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
+
+import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
+
+/**
+ * Monitor for KCL workers 3.x readiness. This monitor is started on all workers but only
+ * executed on the leader of the fleet. The leader determines 3.x readiness if GSI of the lease
+ * table is active and all lease owners are emitting WorkerMetricStats. The monitor performs this
+ * check periodically and will invoke callback if the readiness conditions are true. Monitor
+ * needs to be explicitly cancelled after the readiness trigger has successfully been handled.
+ *
+ * Thread safety - Guard for safety against public method invocation and internal runnable method.
+ */
+@Slf4j
+@ThreadSafe
+@KinesisClientInternalApi
+public class MigrationReadyMonitor implements Runnable {
+ private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
+ private static final long LOG_INTERVAL_NANOS = Duration.ofMinutes(5).toNanos();
+
+ /**
+ * Default retry attempt for loading leases and workers before giving up.
+ */
+ private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
+
+ private final MetricsFactory metricsFactory;
+ private final Callable timeProvider;
+ private final LeaderDecider leaderDecider;
+ private final String currentWorkerId;
+ private final WorkerMetricStatsDAO workerMetricStatsDAO;
+ private final long workerMetricStatsExpirySeconds;
+ private final LeaseRefresher leaseRefresher;
+ private final ScheduledExecutorService stateMachineThreadPool;
+ private final MonitorTriggerStabilizer triggerStabilizer;
+
+ private final LogRateLimiter rateLimitedStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
+ private ScheduledFuture> scheduledFuture;
+ private boolean gsiStatusReady;
+ private boolean workerMetricsReady;
+ private Set lastKnownUniqueLeaseOwners = new HashSet<>();
+ private Set lastKnownWorkersWithActiveWorkerMetrics = new HashSet<>();
+
+ public MigrationReadyMonitor(
+ final MetricsFactory metricsFactory,
+ final Callable timeProvider,
+ final LeaderDecider leaderDecider,
+ final String currentWorkerId,
+ final WorkerMetricStatsDAO workerMetricStatsDAO,
+ final long workerMetricsExpirySeconds,
+ final LeaseRefresher leaseRefresher,
+ final ScheduledExecutorService stateMachineThreadPool,
+ final Runnable callback,
+ final long callbackStabilizationInSeconds) {
+ this.metricsFactory = metricsFactory;
+ this.timeProvider = timeProvider;
+ this.leaderDecider = leaderDecider;
+ this.currentWorkerId = currentWorkerId;
+ this.workerMetricStatsDAO = workerMetricStatsDAO;
+ this.workerMetricStatsExpirySeconds = workerMetricsExpirySeconds;
+ this.leaseRefresher = leaseRefresher;
+ this.stateMachineThreadPool = stateMachineThreadPool;
+ this.triggerStabilizer =
+ new MonitorTriggerStabilizer(timeProvider, callbackStabilizationInSeconds, callback, currentWorkerId);
+ }
+
+ public synchronized void startMonitor() {
+ if (Objects.isNull(scheduledFuture)) {
+
+ log.info("Starting migration ready monitor");
+ scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
+ this, MONITOR_INTERVAL_MILLIS, MONITOR_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
+ } else {
+ log.info("Ignoring monitor request, since it is already started");
+ }
+ }
+
+ /**
+ * Cancel the monitor. Once the method returns callback will not be invoked,
+ * but callback can be invoked reentrantly before this method returns.
+ */
+ public synchronized void cancel() {
+ if (Objects.nonNull(scheduledFuture)) {
+ log.info("Cancelled migration ready monitor");
+ scheduledFuture.cancel(true);
+ scheduledFuture = null;
+ } else {
+ log.info("{} is currently not active", this);
+ }
+ }
+
+ @Override
+ public synchronized void run() {
+ try {
+ if (Thread.currentThread().isInterrupted()) {
+ log.info("{} cancelled, exiting...", this);
+ return;
+ }
+ if (!leaderDecider.isLeader(currentWorkerId)) {
+ log.debug("Not the leader, not performing migration ready check {}", this);
+ triggerStabilizer.reset();
+ lastKnownUniqueLeaseOwners.clear();
+ lastKnownWorkersWithActiveWorkerMetrics.clear();
+ return;
+ }
+
+ triggerStabilizer.call(isReadyForUpgradeTo3x());
+ rateLimitedStatusLogger.log(() -> log.info("Monitor ran successfully {}", this));
+ } catch (final Throwable t) {
+ log.warn("{} failed, will retry after {}", this, MONITOR_INTERVAL_MILLIS, t);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder("UpgradeReadyMonitor[")
+ .append("G=")
+ .append(gsiStatusReady)
+ .append(",W=")
+ .append(workerMetricsReady)
+ .append("]")
+ .toString();
+ }
+
+ private boolean isReadyForUpgradeTo3x() throws DependencyException {
+ final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
+ try {
+ // If GSI is not ready, optimize to not check if worker metrics are being emitted
+ final boolean localGsiReadyStatus = leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive();
+ if (localGsiReadyStatus != gsiStatusReady) {
+ gsiStatusReady = localGsiReadyStatus;
+ log.info("Gsi ready status changed to {}", gsiStatusReady);
+ } else {
+ log.debug("GsiReady status {}", gsiStatusReady);
+ }
+ return gsiStatusReady && areLeaseOwnersEmittingWorkerMetrics();
+ } finally {
+ scope.addData("GsiReadyStatus", gsiStatusReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ scope.addData(
+ "WorkerMetricsReadyStatus", workerMetricsReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ MetricsUtil.endScope(scope);
+ }
+ }
+
+ private boolean areLeaseOwnersEmittingWorkerMetrics() {
+ final CompletableFuture> leaseListFuture = loadLeaseListAsync();
+ final CompletableFuture> workerMetricsFuture = loadWorkerMetricStats();
+
+ final List leaseList = leaseListFuture.join();
+ final Set leaseOwners = getUniqueLeaseOwnersFromLeaseTable(leaseList);
+ final List workerMetricStatsList = workerMetricsFuture.join();
+ final Set workersWithActiveWorkerMetrics = getWorkersWithActiveWorkerMetricStats(workerMetricStatsList);
+
+ // Leases are not checked for expired condition because:
+ // If some worker has gone down and is not active, but has lease assigned to it, those leases
+ // maybe expired. Since the worker is down, it may not have worker-metrics, or worker-metrics may not be active,
+ // In that case, the migration condition is not considered to be met.
+ // However, those leases should be assigned to another worker and so the check in the next
+ // iteration could succeed. This is intentional to make sure all leases owners are accounted for
+ // and the old owner does not come back up without worker metrics and reacquires the lease.
+ final boolean localWorkerMetricsReady = leaseOwners.equals(workersWithActiveWorkerMetrics);
+ if (localWorkerMetricsReady != workerMetricsReady) {
+ workerMetricsReady = localWorkerMetricsReady;
+ log.info("WorkerMetricStats status changed to {}", workerMetricsReady);
+ log.info("Lease List {}", leaseList);
+ log.info("WorkerMetricStats {}", workerMetricStatsList);
+ } else {
+ log.debug("WorkerMetricStats ready status {}", workerMetricsReady);
+ }
+
+ if (lastKnownUniqueLeaseOwners == null) {
+ log.info("Unique lease owners {}", leaseOwners);
+ } else if (!lastKnownUniqueLeaseOwners.equals(leaseOwners)) {
+ log.info("Unique lease owners changed to {}", leaseOwners);
+ }
+ lastKnownUniqueLeaseOwners = leaseOwners;
+
+ if (lastKnownWorkersWithActiveWorkerMetrics == null) {
+ log.info("Workers with active worker metric stats {}", workersWithActiveWorkerMetrics);
+ } else if (!lastKnownWorkersWithActiveWorkerMetrics.equals(workersWithActiveWorkerMetrics)) {
+ log.info("Workers with active worker metric stats changed {}", workersWithActiveWorkerMetrics);
+ }
+ lastKnownWorkersWithActiveWorkerMetrics = workersWithActiveWorkerMetrics;
+
+ return workerMetricsReady;
+ }
+
+ private Set getUniqueLeaseOwnersFromLeaseTable(final List leaseList) {
+ return leaseList.stream().map(Lease::leaseOwner).collect(Collectors.toSet());
+ }
+
+ private Set getWorkersWithActiveWorkerMetricStats(final List workerMetricStats) {
+ final long nowInSeconds = Duration.ofMillis(now(timeProvider)).getSeconds();
+ return workerMetricStats.stream()
+ .filter(metricStats -> isWorkerMetricStatsActive(metricStats, nowInSeconds))
+ .map(WorkerMetricStats::getWorkerId)
+ .collect(Collectors.toSet());
+ }
+
+ private boolean isWorkerMetricStatsActive(final WorkerMetricStats metricStats, final long nowInSeconds) {
+ return (metricStats.getLastUpdateTime() + workerMetricStatsExpirySeconds) > nowInSeconds;
+ }
+
+ private CompletableFuture> loadWorkerMetricStats() {
+ return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricStatsDAO::getAllWorkerMetricStats));
+ }
+
+ private CompletableFuture> loadLeaseListAsync() {
+ return CompletableFuture.supplyAsync(() -> loadWithRetry(leaseRefresher::listLeases));
+ }
+
+ private T loadWithRetry(final Callable loadFunction) {
+ int retryAttempt = 0;
+ while (true) {
+ try {
+ return loadFunction.call();
+ } catch (final Exception e) {
+ if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
+ log.warn(
+ "Failed to load : {}, retrying",
+ loadFunction.getClass().getName(),
+ e);
+ retryAttempt++;
+ } else {
+ throw new CompletionException(e);
+ }
+ }
+ }
+ }
+
+ private static long now(final Callable timeProvider) {
+ try {
+ return timeProvider.call();
+ } catch (final Exception e) {
+ log.debug("Time provider threw exception, using System.currentTimeMillis", e);
+ return System.currentTimeMillis();
+ }
+ }
+
+ /**
+ * Stabilize the monitor trigger before invoking the callback
+ * to ensure we are consistently seeing the trigger for a configured
+ * stabilizationDurationInMillis
+ */
+ private static class MonitorTriggerStabilizer {
+ private final Callable timeProvider;
+ private final long stabilizationDurationInSeconds;
+ private final Runnable callback;
+ private final String currentWorkerId;
+ private final LogRateLimiter rateLimitedTriggerStatusLogger;
+
+ private long lastToggleTimeInMillis;
+ private boolean currentTriggerStatus;
+
+ public MonitorTriggerStabilizer(
+ final Callable timeProvider,
+ final long stabilizationDurationInSeconds,
+ final Runnable callback,
+ final String currentWorkerId) {
+ this.timeProvider = timeProvider;
+ this.stabilizationDurationInSeconds = stabilizationDurationInSeconds;
+ this.callback = callback;
+ this.currentWorkerId = currentWorkerId;
+ this.rateLimitedTriggerStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
+ }
+
+ public void call(final boolean isMonitorTriggered) {
+ final long now = now(timeProvider);
+ if (currentTriggerStatus != isMonitorTriggered) {
+ log.info("Trigger status has changed to {}", isMonitorTriggered);
+ currentTriggerStatus = isMonitorTriggered;
+ lastToggleTimeInMillis = now;
+ }
+
+ if (currentTriggerStatus) {
+ final long deltaSeconds =
+ Duration.ofMillis(now - lastToggleTimeInMillis).getSeconds();
+ if (deltaSeconds >= stabilizationDurationInSeconds) {
+ log.info("Trigger has been consistently true for {}s, invoking callback", deltaSeconds);
+ callback.run();
+ } else {
+ rateLimitedTriggerStatusLogger.log(() -> log.info(
+ "Trigger has been true for {}s, waiting for stabilization time of {}s",
+ deltaSeconds,
+ stabilizationDurationInSeconds));
+ }
+ }
+ }
+
+ public void reset() {
+ if (currentTriggerStatus) {
+ log.info("This worker {} is no longer the leader, reset current status", currentWorkerId);
+ }
+ currentTriggerStatus = false;
+ }
+ }
+
+ @RequiredArgsConstructor
+ private static class LogRateLimiter {
+ private final long logIntervalInNanos;
+
+ private long nextLogTime = System.nanoTime();
+
+ public void log(final Runnable logger) {
+ final long now = System.nanoTime();
+ if (now >= nextLogTime) {
+ logger.run();
+ nextLogTime = now + logIntervalInNanos;
+ }
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationState.java
new file mode 100644
index 000000000..dcdecad07
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationState.java
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.coordinator.migration;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.ToString;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
+import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
+import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.common.StackTraceUtils;
+import software.amazon.kinesis.coordinator.CoordinatorState;
+
+/**
+ * Data model of the Migration state. This is used to track the state related to migration
+ * from KCLv2.x to KCLv3.x.
+ */
+@Getter
+@ToString(callSuper = true)
+@Slf4j
+@KinesisClientInternalApi
+public class MigrationState extends CoordinatorState {
+ /**
+ * Key value for the item in the CoordinatorState table
+ */
+ public static final String MIGRATION_HASH_KEY = "Migration3.0";
+ /**
+ * Attribute name in migration state item, whose value is used during
+ * the KCL v3.x migration process to know whether the workers need to
+ * perform KCL v2.x compatible operations or can perform native KCL v3.x
+ * operations.
+ */
+ public static final String CLIENT_VERSION_ATTRIBUTE_NAME = "cv";
+
+ public static final String MODIFIED_BY_ATTRIBUTE_NAME = "mb";
+ public static final String MODIFIED_TIMESTAMP_ATTRIBUTE_NAME = "mts";
+ public static final String HISTORY_ATTRIBUTE_NAME = "h";
+ private static final int MAX_HISTORY_ENTRIES = 10;
+
+ private ClientVersion clientVersion;
+ private String modifiedBy;
+ private long modifiedTimestamp;
+ private final List history;
+
+ private MigrationState(
+ final String key,
+ final ClientVersion clientVersion,
+ final String modifiedBy,
+ final long modifiedTimestamp,
+ final List historyEntries,
+ final Map others) {
+ setKey(key);
+ setAttributes(others);
+ this.clientVersion = clientVersion;
+ this.modifiedBy = modifiedBy;
+ this.modifiedTimestamp = modifiedTimestamp;
+ this.history = historyEntries;
+ }
+
+ public MigrationState(final String key, final String modifiedBy) {
+ this(
+ key,
+ ClientVersion.CLIENT_VERSION_INIT,
+ modifiedBy,
+ System.currentTimeMillis(),
+ new ArrayList<>(),
+ new HashMap<>());
+ }
+
+ public HashMap serialize() {
+ final HashMap result = new HashMap<>();
+ result.put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(clientVersion.name()));
+ result.put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(modifiedBy));
+ result.put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(modifiedTimestamp)));
+
+ if (!history.isEmpty()) {
+ final List historyList = new ArrayList<>();
+ for (final HistoryEntry entry : history) {
+ historyList.add(AttributeValue.builder().m(entry.serialize()).build());
+ }
+ result.put(
+ HISTORY_ATTRIBUTE_NAME,
+ AttributeValue.builder().l(historyList).build());
+ }
+
+ return result;
+ }
+
+ public static MigrationState deserialize(final String key, final HashMap attributes) {
+ if (!MIGRATION_HASH_KEY.equals(key)) {
+ return null;
+ }
+
+ try {
+ final HashMap mutableAttributes = new HashMap<>(attributes);
+ final ClientVersion clientVersion = ClientVersion.valueOf(
+ mutableAttributes.remove(CLIENT_VERSION_ATTRIBUTE_NAME).s());
+ final String modifiedBy =
+ mutableAttributes.remove(MODIFIED_BY_ATTRIBUTE_NAME).s();
+ final long modifiedTimestamp = Long.parseLong(
+ mutableAttributes.remove(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n());
+
+ final List historyList = new ArrayList<>();
+ if (attributes.containsKey(HISTORY_ATTRIBUTE_NAME)) {
+ mutableAttributes.remove(HISTORY_ATTRIBUTE_NAME).l().stream()
+ .map(historyEntry -> HistoryEntry.deserialize(historyEntry.m()))
+ .forEach(historyList::add);
+ }
+ final MigrationState migrationState = new MigrationState(
+ MIGRATION_HASH_KEY, clientVersion, modifiedBy, modifiedTimestamp, historyList, mutableAttributes);
+
+ if (!mutableAttributes.isEmpty()) {
+ log.info("Unknown attributes {} for state {}", mutableAttributes, migrationState);
+ }
+ return migrationState;
+
+ } catch (final Exception e) {
+ log.warn("Unable to deserialize state with key {} and attributes {}", key, attributes, e);
+ }
+ return null;
+ }
+
+ public Map getDynamoClientVersionExpectation() {
+ return new HashMap() {
+ {
+ put(
+ CLIENT_VERSION_ATTRIBUTE_NAME,
+ ExpectedAttributeValue.builder()
+ .value(AttributeValue.fromS(clientVersion.name()))
+ .build());
+ }
+ };
+ }
+
+ public MigrationState copy() {
+ return new MigrationState(
+ getKey(),
+ getClientVersion(),
+ getModifiedBy(),
+ getModifiedTimestamp(),
+ new ArrayList<>(getHistory()),
+ new HashMap<>(getAttributes()));
+ }
+
+ public MigrationState update(final ClientVersion clientVersion, final String modifiedBy) {
+ log.info(
+ "Migration state is being updated to {} current state {} caller {}",
+ clientVersion,
+ this,
+ StackTraceUtils.getPrintableStackTrace(Thread.currentThread().getStackTrace()));
+ addHistoryEntry(this.clientVersion, this.modifiedBy, this.modifiedTimestamp);
+ this.clientVersion = clientVersion;
+ this.modifiedBy = modifiedBy;
+ this.modifiedTimestamp = System.currentTimeMillis();
+ return this;
+ }
+
+ public void addHistoryEntry(
+ final ClientVersion lastClientVersion, final String lastModifiedBy, final long lastModifiedTimestamp) {
+ history.add(0, new HistoryEntry(lastClientVersion, lastModifiedBy, lastModifiedTimestamp));
+ if (history.size() > MAX_HISTORY_ENTRIES) {
+ log.info("Limit {} reached, dropping history {}", MAX_HISTORY_ENTRIES, history.remove(history.size() - 1));
+ }
+ }
+
+ public Map getDynamoUpdate() {
+ final HashMap updates = new HashMap<>();
+ updates.put(
+ CLIENT_VERSION_ATTRIBUTE_NAME,
+ AttributeValueUpdate.builder()
+ .value(AttributeValue.fromS(clientVersion.name()))
+ .action(AttributeAction.PUT)
+ .build());
+ updates.put(
+ MODIFIED_BY_ATTRIBUTE_NAME,
+ AttributeValueUpdate.builder()
+ .value(AttributeValue.fromS(modifiedBy))
+ .action(AttributeAction.PUT)
+ .build());
+ updates.put(
+ MODIFIED_TIMESTAMP_ATTRIBUTE_NAME,
+ AttributeValueUpdate.builder()
+ .value(AttributeValue.fromN(String.valueOf(modifiedTimestamp)))
+ .action(AttributeAction.PUT)
+ .build());
+ if (!history.isEmpty()) {
+ updates.put(
+ HISTORY_ATTRIBUTE_NAME,
+ AttributeValueUpdate.builder()
+ .value(AttributeValue.fromL(
+ history.stream().map(HistoryEntry::toAv).collect(Collectors.toList())))
+ .action(AttributeAction.PUT)
+ .build());
+ }
+ return updates;
+ }
+
+ @RequiredArgsConstructor
+ @ToString
+ public static class HistoryEntry {
+ private final ClientVersion lastClientVersion;
+ private final String lastModifiedBy;
+ private final long lastModifiedTimestamp;
+
+ public AttributeValue toAv() {
+ return AttributeValue.fromM(serialize());
+ }
+
+ public Map serialize() {
+ return new HashMap() {
+ {
+ put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(lastClientVersion.name()));
+ put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(lastModifiedBy));
+ put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(lastModifiedTimestamp)));
+ }
+ };
+ }
+
+ public static HistoryEntry deserialize(final Map map) {
+ return new HistoryEntry(
+ ClientVersion.valueOf(map.get(CLIENT_VERSION_ATTRIBUTE_NAME).s()),
+ map.get(MODIFIED_BY_ATTRIBUTE_NAME).s(),
+ Long.parseLong(map.get(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n()));
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachine.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachine.java
new file mode 100644
index 000000000..4698feb08
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachine.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+
+/**
+ * State machine that provides:
+ * 1. Seamless upgrade from 2.x to 3.x - 3.x has introduced new algorithms that are not compatible with 2.x
+ * workers, so the state machine allows to seamlessly run the 2.x functionality to be compliant with any
+ * 2.x worker in the fleet, and also seamlessly switch to 3.x functionality when all KCL workers are
+ * 3.x complaint.
+ * 2. Instant rollbacks - Rollbacks are supported using the KCL Migration tool to revert back to 2.x functionality
+ * if customer finds regressions in 3.x functionality.
+ * 3. Instant roll-forwards - Once any issue has been mitigated, rollfowards are supported instantly
+ * with KCL Migration tool.
+ */
+public interface MigrationStateMachine {
+ /**
+ * Initialize the state machine by identifying the initial state when the KCL worker comes up for the first time.
+ * @throws DependencyException When unable to identify the initial state.
+ */
+ void initialize() throws DependencyException;
+
+ /**
+ * Shutdown state machine and perform necessary cleanup for the worker to gracefully shutdown
+ */
+ void shutdown();
+
+ /**
+ * Terminate the state machine when it reaches a terminal state, which is a successful upgrade
+ * to v3.x.
+ */
+ void terminate();
+
+ /**
+ * Peform transition from current state to the given new ClientVersion
+ * @param nextClientVersion clientVersion of the new state the state machine must transition to
+ * @param state the current MigrationState in dynamo
+ * @throws InvalidStateException when transition fails, this allows the state machine to stay
+ * in the current state until a valid transition is possible
+ * @throws DependencyException when transition fails due to dependency on DDB failing in
+ * unexpected ways.
+ */
+ void transitionTo(final ClientVersion nextClientVersion, final MigrationState state)
+ throws InvalidStateException, DependencyException;
+
+ /**
+ * Get the ClientVersion of current state machine state.
+ * @return ClientVersion of current state machine state
+ */
+ ClientVersion getCurrentClientVersion();
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineImpl.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineImpl.java
new file mode 100644
index 000000000..96e16a0f5
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineImpl.java
@@ -0,0 +1,254 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package software.amazon.kinesis.coordinator.migration;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import lombok.Getter;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+/**
+ * Implementation of {@link MigrationStateMachine}
+ */
+@KinesisClientInternalApi
+@Getter
+@Slf4j
+@ThreadSafe
+public class MigrationStateMachineImpl implements MigrationStateMachine {
+ public static final String FAULT_METRIC = "Fault";
+ public static final String METRICS_OPERATION = "Migration";
+
+ private static final long THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS = 5L;
+
+ private final MetricsFactory metricsFactory;
+ private final Callable timeProvider;
+ private final CoordinatorStateDAO coordinatorStateDAO;
+ private final ScheduledExecutorService stateMachineThreadPool;
+ private DynamicMigrationComponentsInitializer initializer;
+ private final ClientVersionConfig clientVersionConfig;
+ private final Random random;
+ private final String workerId;
+ private final long flipTo3XStabilizerTimeInSeconds;
+ private MigrationState startingMigrationState;
+
+ @Getter
+ private ClientVersion startingClientVersion;
+
+ private MigrationClientVersionState currentMigrationClientVersionState = new MigrationClientVersionState() {
+ @Override
+ public ClientVersion clientVersion() {
+ return ClientVersion.CLIENT_VERSION_INIT;
+ }
+
+ @Override
+ public void enter(final ClientVersion fromClientVersion) {
+ log.info("Entered {}...", clientVersion());
+ }
+
+ @Override
+ public void leave() {
+ log.info("Left {}...", clientVersion());
+ }
+ };
+ private boolean terminated = false;
+
+ public MigrationStateMachineImpl(
+ final MetricsFactory metricsFactory,
+ final Callable timeProvider,
+ final CoordinatorStateDAO coordinatorStateDAO,
+ final ScheduledExecutorService stateMachineThreadPool,
+ final ClientVersionConfig clientVersionConfig,
+ final Random random,
+ final DynamicMigrationComponentsInitializer initializer,
+ final String workerId,
+ final long flipTo3XStabilizerTimeInSeconds) {
+ this.metricsFactory = metricsFactory;
+ this.timeProvider = timeProvider;
+ this.coordinatorStateDAO = coordinatorStateDAO;
+ this.stateMachineThreadPool = stateMachineThreadPool;
+ this.clientVersionConfig = clientVersionConfig;
+ this.random = random;
+ this.initializer = initializer;
+ this.workerId = workerId;
+ this.flipTo3XStabilizerTimeInSeconds = flipTo3XStabilizerTimeInSeconds;
+ }
+
+ @Override
+ public void initialize() throws DependencyException {
+ if (startingClientVersion == null) {
+ log.info("Initializing MigrationStateMachine");
+ coordinatorStateDAO.initialize();
+ final MigrationClientVersionStateInitializer startingStateInitializer =
+ new MigrationClientVersionStateInitializer(
+ timeProvider, coordinatorStateDAO, clientVersionConfig, random, workerId);
+ final SimpleEntry dataForInitialization =
+ startingStateInitializer.getInitialState();
+ initializer.initialize(dataForInitialization.getKey());
+ transitionTo(dataForInitialization.getKey(), dataForInitialization.getValue());
+ startingClientVersion = dataForInitialization.getKey();
+ startingMigrationState = dataForInitialization.getValue();
+ log.info("MigrationStateMachine initial clientVersion {}", startingClientVersion);
+ } else {
+ log.info("MigrationStateMachine already initialized with clientVersion {}", startingClientVersion);
+ }
+ }
+
+ @Override
+ public void shutdown() {
+ terminate();
+ if (!stateMachineThreadPool.isShutdown()) {
+ stateMachineThreadPool.shutdown();
+ try {
+ if (stateMachineThreadPool.awaitTermination(THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
+ log.info(
+ "StateMachineThreadPool did not shutdown within {} seconds, forcefully shutting down",
+ THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS);
+ stateMachineThreadPool.shutdownNow();
+ }
+ } catch (final InterruptedException e) {
+ log.info("Interrupted when shutting down StateMachineThreadPool, forcefully shutting down");
+ stateMachineThreadPool.shutdownNow();
+ }
+ }
+ log.info("Shutdown successfully");
+ }
+
+ @Override
+ public synchronized void terminate() {
+ if (!terminated && currentMigrationClientVersionState != null) {
+ log.info("State machine is about to terminate");
+ currentMigrationClientVersionState.leave();
+ currentMigrationClientVersionState = null;
+ log.info("State machine reached a terminal state.");
+ terminated = true;
+ }
+ }
+
+ @Override
+ public synchronized void transitionTo(final ClientVersion nextClientVersion, final MigrationState migrationState)
+ throws DependencyException {
+ if (terminated) {
+ throw new IllegalStateException(String.format(
+ "Cannot transition to %s after state machine is terminated, %s",
+ nextClientVersion.name(), migrationState));
+ }
+
+ final MigrationClientVersionState nextMigrationClientVersionState =
+ createMigrationClientVersionState(nextClientVersion, migrationState);
+ log.info(
+ "Attempting to transition from {} to {}",
+ currentMigrationClientVersionState.clientVersion(),
+ nextClientVersion);
+ currentMigrationClientVersionState.leave();
+
+ enter(nextMigrationClientVersionState);
+ }
+
+ /**
+ * Enter with retry. When entering the state machine for the first time, the caller has retry so exceptions
+ * will be re-thrown. Once the state machine has initialized all transitions will be an indefinite retry.
+ * It is possible the DDB state has changed by the time enter succeeds but that will occur as a new
+ * state transition after entering the state. Usually the failures are due to unexpected issues with
+ * DDB which will be transitional and will recover on a retry.
+ * @param nextMigrationClientVersionState the state to transition to
+ * @throws DependencyException If entering fails during state machine initialization.
+ */
+ private void enter(final MigrationClientVersionState nextMigrationClientVersionState) throws DependencyException {
+ boolean success = false;
+ while (!success) {
+ try {
+ // Enter should never fail unless it is the starting state and fails to create the GSI,
+ // in which case it is an unrecoverable error that is bubbled up and KCL start up will fail.
+ nextMigrationClientVersionState.enter(currentMigrationClientVersionState.clientVersion());
+
+ currentMigrationClientVersionState = nextMigrationClientVersionState;
+ log.info("Successfully transitioned to {}", nextMigrationClientVersionState.clientVersion());
+ if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_3X) {
+ terminate();
+ }
+ success = true;
+ } catch (final DependencyException e) {
+ if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
+ throw e;
+ }
+ log.info(
+ "Transitioning from {} to {} failed, retrying after 1 second",
+ currentMigrationClientVersionState.clientVersion(),
+ nextMigrationClientVersionState.clientVersion(),
+ e);
+
+ final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
+ scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
+ MetricsUtil.endScope(scope);
+
+ try {
+ Thread.sleep(1000);
+ } catch (final InterruptedException ie) {
+ log.info("Interrupted while sleeping before retrying state machine transition", ie);
+ }
+ }
+ }
+ }
+
+ private MigrationClientVersionState createMigrationClientVersionState(
+ final ClientVersion clientVersion, final MigrationState migrationState) {
+ switch (clientVersion) {
+ case CLIENT_VERSION_2X:
+ return new MigrationClientVersion2xState(
+ this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
+ case CLIENT_VERSION_UPGRADE_FROM_2X:
+ return new MigrationClientVersionUpgradeFrom2xState(
+ this,
+ timeProvider,
+ coordinatorStateDAO,
+ stateMachineThreadPool,
+ initializer,
+ random,
+ migrationState,
+ flipTo3XStabilizerTimeInSeconds);
+ case CLIENT_VERSION_3X_WITH_ROLLBACK:
+ return new MigrationClientVersion3xWithRollbackState(
+ this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
+ case CLIENT_VERSION_3X:
+ return new MigrationClientVersion3xState(this, initializer);
+ }
+ throw new IllegalStateException(String.format("Unknown client version %s", clientVersion));
+ }
+
+ public ClientVersion getCurrentClientVersion() {
+ if (currentMigrationClientVersionState != null) {
+ return currentMigrationClientVersionState.clientVersion();
+ } else if (terminated) {
+ return ClientVersion.CLIENT_VERSION_3X;
+ }
+ throw new UnsupportedOperationException(
+ "No current state when state machine is either not initialized" + " or already terminated");
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDecider.java
new file mode 100644
index 000000000..710637e54
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDecider.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.leader;
+
+import java.time.Duration;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.amazonaws.services.dynamodbv2.AcquireLockOptions;
+import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClient;
+import com.amazonaws.services.dynamodbv2.LockItem;
+import com.amazonaws.services.dynamodbv2.model.LockCurrentlyUnavailableException;
+import com.google.common.annotations.VisibleForTesting;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
+import software.amazon.kinesis.coordinator.LeaderDecider;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static software.amazon.kinesis.coordinator.CoordinatorState.LEADER_HASH_KEY;
+
+/**
+ * Implementation for LeaderDecider to elect leader using lock on dynamo db table. This class uses
+ * AmazonDynamoDBLockClient library to perform the leader election.
+ */
+@RequiredArgsConstructor
+@Slf4j
+@KinesisClientInternalApi
+public class DynamoDBLockBasedLeaderDecider implements LeaderDecider {
+ private static final Long DEFAULT_LEASE_DURATION_MILLIS =
+ Duration.ofMinutes(2).toMillis();
+ // Heartbeat frequency should be at-least 3 times smaller the lease duration according to LockClient documentation
+ private static final Long DEFAULT_HEARTBEAT_PERIOD_MILLIS =
+ Duration.ofSeconds(30).toMillis();
+
+ private final CoordinatorStateDAO coordinatorStateDao;
+ private final AmazonDynamoDBLockClient dynamoDBLockClient;
+ private final Long heartbeatPeriodMillis;
+ private final String workerId;
+ private final MetricsFactory metricsFactory;
+
+ private long lastCheckTimeInMillis = 0L;
+ private boolean lastIsLeaderResult = false;
+ private final AtomicBoolean isShutdown = new AtomicBoolean(false);
+
+ @VisibleForTesting
+ static DynamoDBLockBasedLeaderDecider create(
+ final CoordinatorStateDAO coordinatorStateDao,
+ final String workerId,
+ final Long leaseDuration,
+ final Long heartbeatPeriod,
+ final MetricsFactory metricsFactory) {
+ final AmazonDynamoDBLockClient dynamoDBLockClient = new AmazonDynamoDBLockClient(coordinatorStateDao
+ .getDDBLockClientOptionsBuilder()
+ .withTimeUnit(TimeUnit.MILLISECONDS)
+ .withLeaseDuration(leaseDuration)
+ .withHeartbeatPeriod(heartbeatPeriod)
+ .withCreateHeartbeatBackgroundThread(true)
+ .withOwnerName(workerId)
+ .build());
+
+ return new DynamoDBLockBasedLeaderDecider(
+ coordinatorStateDao, dynamoDBLockClient, heartbeatPeriod, workerId, metricsFactory);
+ }
+
+ public static DynamoDBLockBasedLeaderDecider create(
+ final CoordinatorStateDAO coordinatorStateDao, final String workerId, final MetricsFactory metricsFactory) {
+ return create(
+ coordinatorStateDao,
+ workerId,
+ DEFAULT_LEASE_DURATION_MILLIS,
+ DEFAULT_HEARTBEAT_PERIOD_MILLIS,
+ metricsFactory);
+ }
+
+ @Override
+ public void initialize() {
+ log.info("Initializing DDB Lock based leader decider");
+ }
+
+ /**
+ * Check the lockItem in storage and if the current worker is not leader worker, then tries to acquire lock and
+ * returns true if it was able to acquire lock else false.
+ * @param workerId ID of the worker
+ * @return true if current worker is leader else false.
+ */
+ @Override
+ public synchronized Boolean isLeader(final String workerId) {
+ // if the decider has shutdown, then return false and don't try acquireLock anymore.
+ if (isShutdown.get()) {
+ publishIsLeaderMetrics(false);
+ return false;
+ }
+ // If the last time we tried to take lock and didnt get lock, don't try to take again for heartbeatPeriodMillis
+ // this is to avoid unnecessary calls to dynamoDB.
+ // Different modules in KCL can request for isLeader check within heartbeatPeriodMillis, and this optimization
+ // will help in those cases.
+ // In case the last call returned true, we want to check the source always to ensure the correctness of leader.
+ if (!lastIsLeaderResult && lastCheckTimeInMillis + heartbeatPeriodMillis > System.currentTimeMillis()) {
+ publishIsLeaderMetrics(lastIsLeaderResult);
+ return lastIsLeaderResult;
+ }
+ boolean response;
+ // Get the lockItem from storage (if present
+ final Optional lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
+ lockItem.ifPresent(item -> log.info("Worker : {} is the current leader.", item.getOwnerName()));
+
+ // If the lockItem is present and is expired, that means either current worker is not leader.
+ if (!lockItem.isPresent() || lockItem.get().isExpired()) {
+ try {
+ // Current worker does not hold the lock, try to acquireOne.
+ final Optional leaderLockItem =
+ dynamoDBLockClient.tryAcquireLock(AcquireLockOptions.builder(LEADER_HASH_KEY)
+ .withRefreshPeriod(heartbeatPeriodMillis)
+ .withTimeUnit(TimeUnit.MILLISECONDS)
+ .withShouldSkipBlockingWait(true)
+ .build());
+ leaderLockItem.ifPresent(item -> log.info("Worker : {} is new leader", item.getOwnerName()));
+ // if leaderLockItem optional is empty, that means the lock is not acquired by this worker.
+ response = leaderLockItem.isPresent();
+ } catch (final InterruptedException e) {
+ // Something bad happened, don't assume leadership and also release lock just in case the
+ // lock was granted and still interrupt happened.
+ releaseLeadershipIfHeld();
+ log.error("Acquiring lock was interrupted in between", e);
+ response = false;
+
+ } catch (final LockCurrentlyUnavailableException e) {
+ response = false;
+ }
+
+ } else {
+ response = lockItem.get().getOwnerName().equals(workerId);
+ }
+
+ lastCheckTimeInMillis = System.currentTimeMillis();
+ lastIsLeaderResult = response;
+ publishIsLeaderMetrics(response);
+ return response;
+ }
+
+ private void publishIsLeaderMetrics(final boolean response) {
+ final MetricsScope metricsScope =
+ MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
+ metricsScope.addData(
+ METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
+ MetricsUtil.endScope(metricsScope);
+ }
+
+ /**
+ * Releases the lock if held by current worker when this method is invoked.
+ */
+ @Override
+ public void shutdown() {
+ if (!isShutdown.getAndSet(true)) {
+ releaseLeadershipIfHeld();
+ }
+ }
+
+ @Override
+ public void releaseLeadershipIfHeld() {
+ try {
+ final Optional lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
+ if (lockItem.isPresent()
+ && !lockItem.get().isExpired()
+ && lockItem.get().getOwnerName().equals(workerId)) {
+
+ log.info(
+ "Current worker : {} holds the lock, releasing it.",
+ lockItem.get().getOwnerName());
+ // LockItem.close() will release the lock if current worker owns it else this call is no op.
+ lockItem.get().close();
+ }
+ } catch (final Exception e) {
+ log.error("Failed to complete releaseLeadershipIfHeld call.", e);
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDecider.java
new file mode 100644
index 000000000..5e5994199
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDecider.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.leader;
+
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.LeaderDecider;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static java.util.Objects.nonNull;
+
+/**
+ * MigrationAdaptiveLeaderDecider that wraps around the actual LeaderDecider which can dynamically
+ * change based on the MigrationStateMachine.
+ */
+@Slf4j
+@KinesisClientInternalApi
+@ThreadSafe
+public class MigrationAdaptiveLeaderDecider implements LeaderDecider {
+
+ private final MetricsFactory metricsFactory;
+ private LeaderDecider currentLeaderDecider;
+
+ public MigrationAdaptiveLeaderDecider(final MetricsFactory metricsFactory) {
+ this.metricsFactory = metricsFactory;
+ }
+
+ @Override
+ public synchronized Boolean isLeader(final String workerId) {
+ if (currentLeaderDecider == null) {
+ throw new IllegalStateException("LeaderDecider uninitialized");
+ }
+
+ final MetricsScope scope =
+ MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
+ try {
+ publishSelectedLeaderDeciderMetrics(scope, currentLeaderDecider);
+ return currentLeaderDecider.isLeader(workerId);
+ } finally {
+ MetricsUtil.endScope(scope);
+ }
+ }
+
+ private static void publishSelectedLeaderDeciderMetrics(
+ final MetricsScope scope, final LeaderDecider leaderDecider) {
+ scope.addData(
+ String.format(leaderDecider.getClass().getSimpleName()), 1D, StandardUnit.COUNT, MetricsLevel.DETAILED);
+ }
+
+ public synchronized void updateLeaderDecider(final LeaderDecider leaderDecider) {
+ if (currentLeaderDecider != null) {
+ currentLeaderDecider.shutdown();
+ log.info(
+ "Updating leader decider dynamically from {} to {}",
+ this.currentLeaderDecider.getClass().getSimpleName(),
+ leaderDecider.getClass().getSimpleName());
+ } else {
+ log.info(
+ "Initializing dynamic leader decider with {}",
+ leaderDecider.getClass().getSimpleName());
+ }
+ currentLeaderDecider = leaderDecider;
+ currentLeaderDecider.initialize();
+ }
+
+ @Override
+ public void shutdown() {
+ if (nonNull(currentLeaderDecider)) {
+ log.info("Shutting down current {}", currentLeaderDecider.getClass().getSimpleName());
+ currentLeaderDecider.shutdown();
+ currentLeaderDecider = null;
+ } else {
+ log.info("LeaderDecider has already been shutdown");
+ }
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java
index 34b13f646..6c27e1441 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java
@@ -81,8 +81,20 @@ public static Long safeGetLong(Map dynamoRecord, String
}
}
+ public static AttributeValue createAttributeValue(Double doubleValue) {
+ if (doubleValue == null) {
+ throw new IllegalArgumentException("Double attributeValues cannot be null.");
+ }
+
+ return AttributeValue.builder().n(doubleValue.toString()).build();
+ }
+
public static String safeGetString(Map dynamoRecord, String key) {
AttributeValue av = dynamoRecord.get(key);
+ return safeGetString(av);
+ }
+
+ public static String safeGetString(AttributeValue av) {
if (av == null) {
return null;
} else {
@@ -99,4 +111,13 @@ public static List safeGetSS(Map dynamoRecord, S
return av.ss();
}
}
+
+ public static Double safeGetDouble(Map dynamoRecord, String key) {
+ AttributeValue av = dynamoRecord.get(key);
+ if (av == null) {
+ return null;
+ } else {
+ return new Double(av.n());
+ }
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java
index d128fc950..97ed48d47 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java
@@ -103,26 +103,6 @@ public class KinesisShardDetector implements ShardDetector {
private static final Boolean THROW_RESOURCE_NOT_FOUND_EXCEPTION = true;
- @Deprecated
- public KinesisShardDetector(
- KinesisAsyncClient kinesisClient,
- String streamName,
- long listShardsBackoffTimeInMillis,
- int maxListShardsRetryAttempts,
- long listShardsCacheAllowedAgeInSeconds,
- int maxCacheMissesBeforeReload,
- int cacheMissWarningModulus) {
- this(
- kinesisClient,
- StreamIdentifier.singleStreamInstance(streamName),
- listShardsBackoffTimeInMillis,
- maxListShardsRetryAttempts,
- listShardsCacheAllowedAgeInSeconds,
- maxCacheMissesBeforeReload,
- cacheMissWarningModulus,
- LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT);
- }
-
public KinesisShardDetector(
KinesisAsyncClient kinesisClient,
StreamIdentifier streamIdentifier,
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java
index 01735f9c8..9d44a7554 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java
@@ -46,7 +46,11 @@
"lastCounterIncrementNanos",
"childShardIds",
"pendingCheckpointState",
- "isMarkedForLeaseSteal"
+ "isMarkedForLeaseSteal",
+ "throughputKBps",
+ "checkpointOwner",
+ "checkpointOwnerTimeoutTimestampMillis",
+ "isExpiredOrUnassigned"
})
@ToString
public class Lease {
@@ -104,6 +108,33 @@ public class Lease {
@Setter
private boolean isMarkedForLeaseSteal;
+ /**
+ * If true, this indicates that lease is ready to be immediately reassigned.
+ */
+ @Setter
+ private boolean isExpiredOrUnassigned;
+
+ /**
+ * Throughput in Kbps for the lease.
+ */
+ private Double throughputKBps;
+
+ /**
+ * Owner of the checkpoint. The attribute is used for graceful shutdowns to indicate the owner that
+ * is allowed to write the checkpoint.
+ */
+ @Setter
+ private String checkpointOwner;
+
+ /**
+ * This field is used for tracking when the shutdown was requested on the lease so we can expire it. This is
+ * deliberately not persisted in DynamoDB because leaseOwner are expected to transfer lease from itself to the
+ * next owner during shutdown. If the worker dies before shutdown the lease will just become expired then we can
+ * pick it up. If for some reason worker is not able to shut down and continues holding onto the lease
+ * this timeout will kick in and force a lease transfer.
+ */
+ @Setter
+ private Long checkpointOwnerTimeoutTimestampMillis;
/**
* Count of distinct lease holders between checkpoints.
*/
@@ -242,6 +273,54 @@ public boolean isExpired(long leaseDurationNanos, long asOfNanos) {
}
}
+ /**
+ * @return true if checkpoint owner is set. Indicating a requested shutdown.
+ */
+ public boolean shutdownRequested() {
+ return checkpointOwner != null;
+ }
+
+ /**
+ * Check whether lease should be blocked on pending checkpoint. We DON'T block if
+ * - lease is expired (Expired lease should be assigned right away) OR
+ * ----- at this point we know lease is assigned -----
+ * - lease is shardEnd (No more processing possible) OR
+ * - lease is NOT requested for shutdown OR
+ * - lease shutdown expired
+ *
+ * @param currentTimeMillis current time in milliseconds
+ * @return true if lease is blocked on pending checkpoint
+ */
+ public boolean blockedOnPendingCheckpoint(long currentTimeMillis) {
+ // using ORs and negate
+ return !(isExpiredOrUnassigned
+ || ExtendedSequenceNumber.SHARD_END.equals(checkpoint)
+ || !shutdownRequested()
+ // if shutdown requested then checkpointOwnerTimeoutTimestampMillis should present
+ || currentTimeMillis - checkpointOwnerTimeoutTimestampMillis >= 0);
+ }
+
+ /**
+ * Check whether lease is eligible for graceful shutdown. It's eligible if
+ * - lease is still assigned (not expired) AND
+ * - lease is NOT shardEnd (No more processing possible AND
+ * - lease is NOT requested for shutdown
+ *
+ * @return true if lease is eligible for graceful shutdown
+ */
+ public boolean isEligibleForGracefulShutdown() {
+ return !isExpiredOrUnassigned && !ExtendedSequenceNumber.SHARD_END.equals(checkpoint) && !shutdownRequested();
+ }
+
+ /**
+ * Need to handle the case during graceful shutdown where leaseOwner isn't the current owner
+ *
+ * @return the actual owner
+ */
+ public String actualOwner() {
+ return checkpointOwner == null ? leaseOwner : checkpointOwner;
+ }
+
/**
* @return true if lease is not currently owned
*/
@@ -343,6 +422,15 @@ public void childShardIds(@NonNull final Collection childShardIds) {
this.childShardIds.addAll(childShardIds);
}
+ /**
+ * Sets throughputKbps.
+ *
+ * @param throughputKBps may not be null
+ */
+ public void throughputKBps(double throughputKBps) {
+ this.throughputKBps = throughputKBps;
+ }
+
/**
* Set the hash range key for this shard.
* @param hashKeyRangeForLease
@@ -370,6 +458,8 @@ public void leaseOwner(String leaseOwner) {
* @return A deep copy of this object.
*/
public Lease copy() {
- return new Lease(this);
+ final Lease lease = new Lease(this);
+ lease.checkpointOwner(this.checkpointOwner);
+ return lease;
}
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java
index acc08dabc..4a42b2c4a 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java
@@ -20,6 +20,7 @@
import java.util.List;
import java.util.UUID;
+import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
@@ -38,11 +39,14 @@ public interface LeaseCoordinator {
/**
* Start background LeaseHolder and LeaseTaker threads.
+ * @param leaseAssignmentModeProvider provider of Lease Assignment mode to determine whether to start components
+ * for both V2 and V3 functionality or only V3 functionality
* @throws ProvisionedThroughputException If we can't talk to DynamoDB due to insufficient capacity.
* @throws InvalidStateException If the lease table doesn't exist
* @throws DependencyException If we encountered exception taking to DynamoDB
*/
- void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
+ void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException;
/**
* Runs a single iteration of the lease taker - used by integration tests.
@@ -152,4 +156,9 @@ default List allLeases() {
* @return LeaseCoordinator
*/
DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity);
+
+ /**
+ * @return instance of {@link LeaseStatsRecorder}
+ */
+ LeaseStatsRecorder leaseStatsRecorder();
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseDiscoverer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseDiscoverer.java
new file mode 100644
index 000000000..9f6ce776e
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseDiscoverer.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.leases;
+
+import java.util.List;
+
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+
+public interface LeaseDiscoverer {
+ /**
+ * Identifies the leases that are assigned to the current worker but are not being tracked and processed by the
+ * current worker.
+ *
+ * @return list of leases assigned to worker which doesn't exist in {@param currentHeldLeaseKeys}
+ * @throws DependencyException if DynamoDB scan fails in an unexpected way
+ * @throws InvalidStateException if lease table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
+ */
+ List discoverNewLeases() throws ProvisionedThroughputException, InvalidStateException, DependencyException;
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java
index 2d4e041c0..ef750f461 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java
@@ -16,7 +16,9 @@
package software.amazon.kinesis.leases;
import java.time.Duration;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
@@ -25,6 +27,7 @@
import java.util.function.Function;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import lombok.Builder;
import lombok.Data;
import lombok.NonNull;
import lombok.experimental.Accessors;
@@ -34,14 +37,17 @@
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.awssdk.services.dynamodb.model.Tag;
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
+import software.amazon.kinesis.common.DdbTableConfig;
import software.amazon.kinesis.common.InitialPositionInStream;
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.common.LeaseCleanupConfig;
import software.amazon.kinesis.common.StreamConfig;
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseManagementFactory;
+import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer;
import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.NullMetricsFactory;
+import software.amazon.kinesis.worker.metric.WorkerMetric;
/**
* Used by the KCL to configure lease management.
@@ -209,6 +215,9 @@ public class LeaseManagementConfig {
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
+ private WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig =
+ new WorkerUtilizationAwareAssignmentConfig();
+
/**
* Whether to enable deletion protection on the DynamoDB lease table created by KCL. This does not update
* already existing tables.
@@ -276,14 +285,17 @@ public LeaseManagementConfig(
}
public LeaseManagementConfig(
- String tableName,
- DynamoDbAsyncClient dynamoDBClient,
- KinesisAsyncClient kinesisClient,
- String workerIdentifier) {
+ final String tableName,
+ final String applicationName,
+ final DynamoDbAsyncClient dynamoDBClient,
+ final KinesisAsyncClient kinesisClient,
+ final String workerIdentifier) {
this.tableName = tableName;
this.dynamoDBClient = dynamoDBClient;
this.kinesisClient = kinesisClient;
this.workerIdentifier = workerIdentifier;
+ this.workerUtilizationAwareAssignmentConfig.workerMetricsTableConfig =
+ new WorkerMetricsTableConfig(applicationName);
}
/**
@@ -350,10 +362,18 @@ static class LeaseManagementThreadPool extends ThreadPoolExecutor {
*/
private TableCreatorCallback tableCreatorCallback = TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK;
+ /**
+ * @deprecated never used and will be removed in future releases
+ */
+ @Deprecated
private HierarchicalShardSyncer hierarchicalShardSyncer;
private LeaseManagementFactory leaseManagementFactory;
+ /**
+ * @deprecated never used and will be removed in future releases
+ */
+ @Deprecated
public HierarchicalShardSyncer hierarchicalShardSyncer() {
if (hierarchicalShardSyncer == null) {
hierarchicalShardSyncer = new HierarchicalShardSyncer();
@@ -361,39 +381,63 @@ public HierarchicalShardSyncer hierarchicalShardSyncer() {
return hierarchicalShardSyncer;
}
+ /**
+ * Configuration class for controlling the graceful handoff of leases.
+ * This configuration allows tuning of the shutdown behavior during lease transfers.
+ *
+ * It provides settings to control the timeout period for waiting on the record processor
+ * to shut down and an option to enable or disable graceful lease handoff.
+ *
+ */
+ @Data
+ @Builder
+ @Accessors(fluent = true)
+ public static class GracefulLeaseHandoffConfig {
+ /**
+ * The minimum amount of time (in milliseconds) to wait for the current shard's RecordProcessor
+ * to gracefully shut down before forcefully transferring the lease to the next owner.
+ *
+ * If each call to {@code processRecords} is expected to run longer than the default value,
+ * it makes sense to set this to a higher value to ensure the RecordProcessor has enough
+ * time to complete its processing.
+ *
+ *
+ * Default value is 30,000 milliseconds (30 seconds).
+ *
+ */
+ @Builder.Default
+ private long gracefulLeaseHandoffTimeoutMillis = 30_000L;
+ /**
+ * Flag to enable or disable the graceful lease handoff mechanism.
+ *
+ * When set to {@code true}, the KCL will attempt to gracefully transfer leases by
+ * allowing the shard's RecordProcessor sufficient time to complete processing before
+ * handing off the lease to another worker. When {@code false}, the lease will be
+ * handed off without waiting for the RecordProcessor to shut down gracefully. Note
+ * that checkpointing is expected to be implemented inside {@code shutdownRequested}
+ * for this feature to work end to end.
+ *
+ *
+ * Default value is {@code true}.
+ *
+ */
+ @Builder.Default
+ private boolean isGracefulLeaseHandoffEnabled = true;
+ }
+
+ private GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
+ GracefulLeaseHandoffConfig.builder().build();
+
+ /**
+ * @deprecated This is no longer invoked, but {@code leaseManagementFactory(LeaseSerializer, boolean)}
+ * is invoked instead. Please remove implementation for this method as future
+ * releases will remove this API.
+ */
@Deprecated
public LeaseManagementFactory leaseManagementFactory() {
if (leaseManagementFactory == null) {
Validate.notEmpty(streamName(), "Stream name is empty");
- leaseManagementFactory = new DynamoDBLeaseManagementFactory(
- kinesisClient(),
- streamName(),
- dynamoDBClient(),
- tableName(),
- workerIdentifier(),
- executorService(),
- initialPositionInStream(),
- failoverTimeMillis(),
- epsilonMillis(),
- maxLeasesForWorker(),
- maxLeasesToStealAtOneTime(),
- maxLeaseRenewalThreads(),
- cleanupLeasesUponShardCompletion(),
- ignoreUnexpectedChildShards(),
- shardSyncIntervalMillis(),
- consistentReads(),
- listShardsBackoffTimeInMillis(),
- maxListShardsRetryAttempts(),
- maxCacheMissesBeforeReload(),
- listShardsCacheAllowedAgeInSeconds(),
- cacheMissWarningModulus(),
- initialLeaseTableReadCapacity(),
- initialLeaseTableWriteCapacity(),
- hierarchicalShardSyncer(),
- tableCreatorCallback(),
- dynamoDbRequestTimeout(),
- billingMode(),
- tags());
+ leaseManagementFactory(new DynamoDBLeaseSerializer(), false);
}
return leaseManagementFactory;
}
@@ -430,7 +474,6 @@ public LeaseManagementFactory leaseManagementFactory(
cacheMissWarningModulus(),
initialLeaseTableReadCapacity(),
initialLeaseTableWriteCapacity(),
- hierarchicalShardSyncer(),
tableCreatorCallback(),
dynamoDbRequestTimeout(),
billingMode(),
@@ -440,7 +483,9 @@ public LeaseManagementFactory leaseManagementFactory(
leaseSerializer,
customShardDetectorProvider(),
isMultiStreamingMode,
- leaseCleanupConfig());
+ leaseCleanupConfig(),
+ workerUtilizationAwareAssignmentConfig(),
+ gracefulLeaseHandoffConfig);
}
return leaseManagementFactory;
}
@@ -454,4 +499,90 @@ public LeaseManagementConfig leaseManagementFactory(final LeaseManagementFactory
this.leaseManagementFactory = leaseManagementFactory;
return this;
}
+
+ @Data
+ @Accessors(fluent = true)
+ public static class WorkerUtilizationAwareAssignmentConfig {
+ /**
+ * This defines the frequency of capturing worker metric stats in memory. Default is 1s
+ */
+ private long inMemoryWorkerMetricsCaptureFrequencyMillis =
+ Duration.ofSeconds(1L).toMillis();
+ /**
+ * This defines the frequency of reporting worker metric stats to storage. Default is 30s
+ */
+ private long workerMetricsReporterFreqInMillis = Duration.ofSeconds(30).toMillis();
+ /**
+ * These are the no. of metrics that are persisted in storage in WorkerMetricStats ddb table.
+ */
+ private int noOfPersistedMetricsPerWorkerMetrics = 10;
+ /**
+ * Option to disable workerMetrics to use in lease balancing.
+ */
+ private boolean disableWorkerMetrics = false;
+ /**
+ * List of workerMetrics for the application.
+ */
+ private List workerMetricList = new ArrayList<>();
+ /**
+ * Max throughput per host KBps, default is unlimited.
+ */
+ private double maxThroughputPerHostKBps = Double.MAX_VALUE;
+ /**
+ * Percentage of value to achieve critical dampening during this case
+ */
+ private int dampeningPercentage = 60;
+ /**
+ * Percentage value used to trigger reBalance. If fleet has workers which are have metrics value more or less
+ * than 10% of fleet level average then reBalance is triggered.
+ * Leases are taken from workers with metrics value more than fleet level average. The load to take from these
+ * workers is determined by evaluating how far they are with respect to fleet level average.
+ */
+ private int reBalanceThresholdPercentage = 10;
+
+ /**
+ * The allowThroughputOvershoot flag determines whether leases should still be taken even if
+ * it causes the total assigned throughput to exceed the desired throughput to take for re-balance.
+ * Enabling this flag provides more flexibility for the LeaseAssignmentManager to explore additional
+ * assignment possibilities, which can lead to faster throughput convergence.
+ */
+ private boolean allowThroughputOvershoot = true;
+
+ /**
+ * Duration after which workerMetricStats entry from WorkerMetricStats table will be cleaned up. When an entry's
+ * lastUpdateTime is older than staleWorkerMetricsEntryCleanupDuration from current time, entry will be removed
+ * from the table.
+ */
+ private Duration staleWorkerMetricsEntryCleanupDuration = Duration.ofDays(1);
+
+ /**
+ * configuration to configure how to create the WorkerMetricStats table, such as table name,
+ * billing mode, provisioned capacity. If no table name is specified, the table name will
+ * default to applicationName-WorkerMetricStats. If no billing more is chosen, default is
+ * On-Demand.
+ */
+ private WorkerMetricsTableConfig workerMetricsTableConfig;
+
+ /**
+ * Frequency to perform worker variance balancing. This value is used with respect to the LAM frequency,
+ * that is every third (as default) iteration of LAM the worker variance balancing will be performed.
+ * Setting it to 1 will make varianceBalancing run on every iteration of LAM and 2 on every 2nd iteration
+ * and so on.
+ * NOTE: LAM frequency = failoverTimeMillis
+ */
+ private int varianceBalancingFrequency = 3;
+
+ /**
+ * Alpha value used for calculating exponential moving average of worker's metricStats. Selecting
+ * higher alpha value gives more weightage to recent value and thus low smoothing effect on computed average
+ * and selecting smaller alpha values gives more weightage to past value and high smoothing effect.
+ */
+ private double workerMetricsEMAAlpha = 0.5;
+ }
+
+ public static class WorkerMetricsTableConfig extends DdbTableConfig {
+ public WorkerMetricsTableConfig(final String applicationName) {
+ super(applicationName, "WorkerMetricStats");
+ }
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java
index 9ed77a537..788034d1e 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java
@@ -15,9 +15,12 @@
package software.amazon.kinesis.leases;
+import java.util.concurrent.ConcurrentMap;
+
import software.amazon.kinesis.common.StreamConfig;
import software.amazon.kinesis.coordinator.DeletedStreamListProvider;
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher;
+import software.amazon.kinesis.lifecycle.ShardConsumer;
import software.amazon.kinesis.metrics.MetricsFactory;
/**
@@ -26,10 +29,27 @@
public interface LeaseManagementFactory {
LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory);
- ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory);
+ default LeaseCoordinator createLeaseCoordinator(
+ MetricsFactory metricsFactory, ConcurrentMap shardInfoShardConsumerMap) {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+ /**
+ * @deprecated This method is never invoked, please remove implementation of this method
+ * as it will be removed in future releases.
+ */
+ @Deprecated
+ default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory) {
+ throw new UnsupportedOperationException("Deprecated");
+ }
+
+ /**
+ * @deprecated This method is never invoked, please remove implementation of this method
+ * as it will be removed in future releases.
+ */
+ @Deprecated
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) {
- throw new UnsupportedOperationException();
+ throw new UnsupportedOperationException("Deprecated");
}
default ShardSyncTaskManager createShardSyncTaskManager(
@@ -41,10 +61,17 @@ default ShardSyncTaskManager createShardSyncTaskManager(
DynamoDBLeaseRefresher createLeaseRefresher();
- ShardDetector createShardDetector();
+ /**
+ * @deprecated This method is never invoked, please remove implementation of this method
+ * as it will be removed in future releases.
+ */
+ @Deprecated
+ default ShardDetector createShardDetector() {
+ throw new UnsupportedOperationException("Deprecated");
+ }
default ShardDetector createShardDetector(StreamConfig streamConfig) {
- throw new UnsupportedOperationException();
+ throw new UnsupportedOperationException("Not implemented");
}
LeaseCleanupManager createLeaseCleanupManager(MetricsFactory metricsFactory);
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java
index c38d442a8..fc71621d1 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java
@@ -15,6 +15,9 @@
package software.amazon.kinesis.leases;
import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.stream.Collectors;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.leases.exceptions.DependencyException;
@@ -75,6 +78,37 @@ boolean createLeaseTableIfNotExists(Long readCapacity, Long writeCapacity)
*/
boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutSeconds) throws DependencyException;
+ /**
+ * Creates the LeaseOwnerToLeaseKey index on the lease table if it doesn't exist and returns the status of index.
+ *
+ * @return indexStatus status of the index.
+ * @throws DependencyException if storage's describe API fails in an unexpected way
+ */
+ default String createLeaseOwnerToLeaseKeyIndexIfNotExists() throws DependencyException {
+ return null;
+ }
+
+ /**
+ * Blocks until the index exists by polling storage till either the index is ACTIVE or else timeout has
+ * happened.
+ *
+ * @param secondsBetweenPolls time to wait between polls in seconds
+ * @param timeoutSeconds total time to wait in seconds
+ *
+ * @return true if index on the table exists and is ACTIVE, false if timeout was reached
+ */
+ default boolean waitUntilLeaseOwnerToLeaseKeyIndexExists(
+ final long secondsBetweenPolls, final long timeoutSeconds) {
+ return false;
+ }
+
+ /**
+ * Check if leaseOwner GSI is ACTIVE
+ * @return true if index is active, false otherwise
+ * @throws DependencyException if storage's describe API fails in an unexpected way
+ */
+ boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException;
+
/**
* List all leases for a given stream synchronously.
*
@@ -87,6 +121,24 @@ boolean createLeaseTableIfNotExists(Long readCapacity, Long writeCapacity)
List listLeasesForStream(StreamIdentifier streamIdentifier)
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
+ /**
+ * List all leases for a given workerIdentifier synchronously.
+ * Default implementation calls listLeases() and filters the results.
+ *
+ * @throws DependencyException if DynamoDB scan fails in an unexpected way
+ * @throws InvalidStateException if lease table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
+ *
+ * @return list of leases
+ */
+ default List listLeaseKeysForWorker(final String workerIdentifier)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+ return listLeases().stream()
+ .filter(lease -> lease.leaseOwner().equals(workerIdentifier))
+ .map(Lease::leaseKey)
+ .collect(Collectors.toList());
+ }
+
/**
* List all objects in table synchronously.
*
@@ -98,6 +150,23 @@ List listLeasesForStream(StreamIdentifier streamIdentifier)
*/
List listLeases() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
+ /**
+ * List all leases from the storage parallely and deserialize into Lease objects. Returns the list of leaseKey
+ * that failed deserialize separately.
+ *
+ * @param threadPool threadpool to use for parallel scan
+ * @param parallelismFactor no. of parallel scans
+ * @return Pair of List of leases from the storage and List of items failed to deserialize
+ * @throws DependencyException if DynamoDB scan fails in an unexpected way
+ * @throws InvalidStateException if lease table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
+ */
+ default Map.Entry, List> listLeasesParallely(
+ final ExecutorService threadPool, final int parallelismFactor)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+ throw new UnsupportedOperationException("listLeasesParallely is not implemented");
+ }
+
/**
* Create a new lease. Conditional on a lease not already existing with this shardId.
*
@@ -154,6 +223,47 @@ boolean createLeaseIfNotExists(Lease lease)
boolean takeLease(Lease lease, String owner)
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
+ /**
+ * Assigns given lease to newOwner owner by incrementing its leaseCounter and setting its owner field. Conditional
+ * on the leaseOwner in DynamoDB matching the leaseOwner of the input lease. Mutates the leaseCounter and owner of
+ * the passed-in lease object after updating DynamoDB.
+ *
+ * @param lease the lease to be assigned
+ * @param newOwner the new owner
+ *
+ * @return true if lease was successfully assigned, false otherwise
+ *
+ * @throws InvalidStateException if lease table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
+ * @throws DependencyException if DynamoDB update fails in an unexpected way
+ */
+ default boolean assignLease(final Lease lease, final String newOwner)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+
+ throw new UnsupportedOperationException("assignLease is not implemented");
+ }
+
+ /**
+ * Initiates a graceful handoff of the given lease to the specified new owner, allowing the current owner
+ * to complete its processing before transferring ownership.
+ *
+ * This method updates the lease with the new owner information but ensures that the current owner
+ * is given time to gracefully finish its work (e.g., processing records) before the lease is reassigned.
+ *
+ *
+ * @param lease the lease to be assigned
+ * @param newOwner the new owner
+ * @return true if a graceful handoff was successfully initiated
+ * @throws InvalidStateException if lease table does not exist
+ * @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
+ * @throws DependencyException if DynamoDB update fails in an unexpected way
+ */
+ default boolean initiateGracefulLeaseHandoff(final Lease lease, final String newOwner)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+
+ throw new UnsupportedOperationException("assignLeaseWithWait is not implemented");
+ }
+
/**
* Evict the current owner of lease by setting owner to null. Conditional on the owner in DynamoDB matching the owner of
* the input. Mutates the lease counter and owner of the passed-in lease object after updating the record in DynamoDB.
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java
index 5d7bea63d..3c4692a92 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java
@@ -15,6 +15,7 @@
package software.amazon.kinesis.leases;
import java.util.Collection;
+import java.util.Collections;
import java.util.Map;
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
@@ -100,6 +101,15 @@ default Map getDynamoExistentExpectation(String
*/
Map getDynamoTakeLeaseUpdate(Lease lease, String newOwner);
+ /**
+ * @param lease lease that needs to be assigned
+ * @param newOwner newLeaseOwner
+ * @return the attribute value map that takes a lease for a new owner
+ */
+ default Map getDynamoAssignLeaseUpdate(Lease lease, String newOwner) {
+ throw new UnsupportedOperationException("getDynamoAssignLeaseUpdate is not implemented");
+ }
+
/**
* @param lease
* @return the attribute value map that voids a lease
@@ -127,8 +137,22 @@ default Map getDynamoUpdateLeaseUpdate(Lease lease
*/
Collection getKeySchema();
+ default Collection getWorkerIdToLeaseKeyIndexKeySchema() {
+ return Collections.EMPTY_LIST;
+ }
+
+ default Collection getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
+ return Collections.EMPTY_LIST;
+ }
+
/**
* @return attribute definitions for creating a DynamoDB table to store leases
*/
Collection getAttributeDefinitions();
+
+ /**
+ * @param lease
+ * @return the attribute value map that includes lease throughput
+ */
+ Map getDynamoLeaseThroughputKbpsUpdate(Lease lease);
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseStatsRecorder.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseStatsRecorder.java
new file mode 100644
index 000000000..6115bf9ab
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseStatsRecorder.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.leases;
+
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import lombok.Builder;
+import lombok.Getter;
+import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.ToString;
+import software.amazon.awssdk.annotations.ThreadSafe;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.utils.ExponentialMovingAverage;
+
+import static java.util.Objects.isNull;
+
+/**
+ * This class records the stats for the leases.
+ * The stats are recorded in a thread safe queue, and the throughput is calculated by summing up the bytes and dividing
+ * by interval in seconds.
+ * This class is thread safe and backed by thread safe data structures.
+ */
+@RequiredArgsConstructor
+@KinesisClientInternalApi
+@ThreadSafe
+public class LeaseStatsRecorder {
+
+ /**
+ * This default alpha is chosen based on the testing so far between simple average and moving average with 0.5.
+ * In the future, if one value does not fit all use cases, inject this via config.
+ */
+ private static final double DEFAULT_ALPHA = 0.5;
+
+ public static final int BYTES_PER_KB = 1024;
+
+ private final Long renewerFrequencyInMillis;
+ private final Map> leaseStatsMap = new ConcurrentHashMap<>();
+ private final Map leaseKeyToExponentialMovingAverageMap =
+ new ConcurrentHashMap<>();
+ private final Callable timeProviderInMillis;
+
+ /**
+ * This method provides happens-before semantics (i.e., the action (access or removal) from a thread happens
+ * before the action from subsequent thread) for the stats recording in multithreaded environment.
+ */
+ public void recordStats(@NonNull final LeaseStats leaseStats) {
+ final Queue leaseStatsQueue =
+ leaseStatsMap.computeIfAbsent(leaseStats.getLeaseKey(), lease -> new ConcurrentLinkedQueue<>());
+ leaseStatsQueue.add(leaseStats);
+ }
+
+ /**
+ * Calculates the throughput in KBps for the given leaseKey.
+ * Method first clears the items that are older than {@link #renewerFrequencyInMillis} from the queue and then
+ * calculates the throughput per second during {@link #renewerFrequencyInMillis} interval and then returns the
+ * ExponentialMovingAverage of the throughput. If method is called in quick succession with or without new stats
+ * the result can be different as ExponentialMovingAverage decays old values on every new call.
+ * This method is thread safe.
+ * @param leaseKey leaseKey for which stats are required
+ * @return throughput in Kbps, returns null if there is no stats available for the leaseKey.
+ */
+ public Double getThroughputKBps(final String leaseKey) {
+ final Queue leaseStatsQueue = leaseStatsMap.get(leaseKey);
+
+ if (isNull(leaseStatsQueue)) {
+ // This means there is no entry for this leaseKey yet
+ return null;
+ }
+
+ filterExpiredEntries(leaseStatsQueue);
+
+ // Convert bytes into KB and divide by interval in second to get throughput per second.
+ final ExponentialMovingAverage exponentialMovingAverage = leaseKeyToExponentialMovingAverageMap.computeIfAbsent(
+ leaseKey, leaseId -> new ExponentialMovingAverage(DEFAULT_ALPHA));
+
+ // Specifically dividing by 1000.0 rather than using Duration class to get seconds, because Duration class
+ // implementation rounds off to seconds and precision is lost.
+ final double frequency = renewerFrequencyInMillis / 1000.0;
+ final double throughput = readQueue(leaseStatsQueue).stream()
+ .mapToDouble(LeaseStats::getBytes)
+ .sum()
+ / BYTES_PER_KB
+ / frequency;
+ exponentialMovingAverage.add(throughput);
+ return exponentialMovingAverage.getValue();
+ }
+
+ /**
+ * Gets the currentTimeMillis and then iterates over the queue to get the stats with creation time less than
+ * currentTimeMillis.
+ * This is specifically done to avoid potential race between with high-frequency put thread blocking get thread.
+ */
+ private Queue readQueue(final Queue leaseStatsQueue) {
+ final long currentTimeMillis = getCurrenTimeInMillis();
+ final Queue response = new LinkedList<>();
+ for (LeaseStats leaseStats : leaseStatsQueue) {
+ if (leaseStats.creationTimeMillis > currentTimeMillis) {
+ break;
+ }
+ response.add(leaseStats);
+ }
+ return response;
+ }
+
+ private long getCurrenTimeInMillis() {
+ try {
+ return timeProviderInMillis.call();
+ } catch (final Exception e) {
+ // Fallback to using the System.currentTimeMillis if failed.
+ return System.currentTimeMillis();
+ }
+ }
+
+ private void filterExpiredEntries(final Queue leaseStatsQueue) {
+ final long currentTime = getCurrenTimeInMillis();
+ while (!leaseStatsQueue.isEmpty()) {
+ final LeaseStats leaseStats = leaseStatsQueue.peek();
+ if (isNull(leaseStats) || currentTime - leaseStats.getCreationTimeMillis() < renewerFrequencyInMillis) {
+ break;
+ }
+ leaseStatsQueue.poll();
+ }
+ }
+
+ /**
+ * Clear the in-memory stats for the lease when a lease is reassigned (due to shut down or lease stealing)
+ * @param leaseKey leaseKey, for which stats are supposed to be clear.
+ */
+ public void dropLeaseStats(final String leaseKey) {
+ leaseStatsMap.remove(leaseKey);
+ leaseKeyToExponentialMovingAverageMap.remove(leaseKey);
+ }
+
+ @Builder
+ @Getter
+ @ToString
+ @KinesisClientInternalApi
+ public static final class LeaseStats {
+ /**
+ * Lease key for which this leaseStats object is created.
+ */
+ private final String leaseKey;
+ /**
+ * Bytes that are processed for a lease
+ */
+ private final long bytes;
+ /**
+ * Wall time in epoch millis at which this leaseStats object was created. This time is used to determine the
+ * expiry of the lease stats.
+ */
+ @Builder.Default
+ private final long creationTimeMillis = System.currentTimeMillis();
+ }
+}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java
index add8cf4f6..9b63883bd 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java
@@ -71,7 +71,7 @@ public class ShardSyncTaskManager {
/**
* Constructor.
*
- * NOTE: This constructor is deprecated and will be removed in a future release.
+ * @deprecated This constructor is deprecated and will be removed in a future release.
*
* @param shardDetector
* @param leaseRefresher
@@ -92,18 +92,16 @@ public ShardSyncTaskManager(
long shardSyncIdleTimeMillis,
ExecutorService executorService,
MetricsFactory metricsFactory) {
- this.shardDetector = shardDetector;
- this.leaseRefresher = leaseRefresher;
- this.initialPositionInStream = initialPositionInStream;
- this.cleanupLeasesUponShardCompletion = cleanupLeasesUponShardCompletion;
- this.garbageCollectLeases = true;
- this.ignoreUnexpectedChildShards = ignoreUnexpectedChildShards;
- this.shardSyncIdleTimeMillis = shardSyncIdleTimeMillis;
- this.executorService = executorService;
- this.hierarchicalShardSyncer = new HierarchicalShardSyncer();
- this.metricsFactory = metricsFactory;
- this.shardSyncRequestPending = new AtomicBoolean(false);
- this.lock = new ReentrantLock();
+ this(
+ shardDetector,
+ leaseRefresher,
+ initialPositionInStream,
+ cleanupLeasesUponShardCompletion,
+ ignoreUnexpectedChildShards,
+ shardSyncIdleTimeMillis,
+ executorService,
+ new HierarchicalShardSyncer(),
+ metricsFactory);
}
/**
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java
index bef76ef05..7eb4c4f1a 100644
--- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java
@@ -19,6 +19,7 @@
import java.util.List;
import java.util.Map;
import java.util.UUID;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
@@ -30,13 +31,17 @@
import java.util.stream.Collectors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCoordinator;
+import software.amazon.kinesis.leases.LeaseDiscoverer;
import software.amazon.kinesis.leases.LeaseManagementConfig;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.LeaseRenewer;
+import software.amazon.kinesis.leases.LeaseStatsRecorder;
import software.amazon.kinesis.leases.LeaseTaker;
import software.amazon.kinesis.leases.MultiStreamLease;
import software.amazon.kinesis.leases.ShardInfo;
@@ -44,6 +49,8 @@
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.LeasingException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+import software.amazon.kinesis.lifecycle.LeaseGracefulShutdownHandler;
+import software.amazon.kinesis.lifecycle.ShardConsumer;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
@@ -70,115 +77,34 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
.setNameFormat("LeaseRenewer-%04d")
.setDaemon(true)
.build();
+ private static final ThreadFactory LEASE_DISCOVERY_THREAD_FACTORY = new ThreadFactoryBuilder()
+ .setNameFormat("LeaseDiscovery-%04d")
+ .setDaemon(true)
+ .build();
private final LeaseRenewer leaseRenewer;
private final LeaseTaker leaseTaker;
+ private final LeaseDiscoverer leaseDiscoverer;
private final long renewerIntervalMillis;
private final long takerIntervalMillis;
+ private final long leaseDiscovererIntervalMillis;
private final ExecutorService leaseRenewalThreadpool;
+ private final ExecutorService leaseDiscoveryThreadPool;
private final LeaseRefresher leaseRefresher;
+ private final LeaseStatsRecorder leaseStatsRecorder;
+ private final LeaseGracefulShutdownHandler leaseGracefulShutdownHandler;
private long initialLeaseTableReadCapacity;
private long initialLeaseTableWriteCapacity;
protected final MetricsFactory metricsFactory;
private final Object shutdownLock = new Object();
-
+ private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
private ScheduledExecutorService leaseCoordinatorThreadPool;
+ private ScheduledFuture> leaseDiscoveryFuture;
private ScheduledFuture> takerFuture;
private volatile boolean running = false;
- /**
- * Constructor.
- *
- * NOTE: This constructor is deprecated and will be removed in a future release.
- *
- * @param leaseRefresher
- * LeaseRefresher instance to use
- * @param workerIdentifier
- * Identifies the worker (e.g. useful to track lease ownership)
- * @param leaseDurationMillis
- * Duration of a lease
- * @param epsilonMillis
- * Allow for some variance when calculating lease expirations
- * @param maxLeasesForWorker
- * Max leases this Worker can handle at a time
- * @param maxLeasesToStealAtOneTime
- * Steal up to these many leases at a time (for load balancing)
- * @param metricsFactory
- * Used to publish metrics about lease operations
- */
- @Deprecated
- public DynamoDBLeaseCoordinator(
- final LeaseRefresher leaseRefresher,
- final String workerIdentifier,
- final long leaseDurationMillis,
- final long epsilonMillis,
- final int maxLeasesForWorker,
- final int maxLeasesToStealAtOneTime,
- final int maxLeaseRenewerThreadCount,
- final MetricsFactory metricsFactory) {
- this(
- leaseRefresher,
- workerIdentifier,
- leaseDurationMillis,
- epsilonMillis,
- maxLeasesForWorker,
- maxLeasesToStealAtOneTime,
- maxLeaseRenewerThreadCount,
- TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
- TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
- metricsFactory);
- }
-
- /**
- * Constructor.
- *
- * @param leaseRefresher
- * LeaseRefresher instance to use
- * @param workerIdentifier
- * Identifies the worker (e.g. useful to track lease ownership)
- * @param leaseDurationMillis
- * Duration of a lease
- * @param epsilonMillis
- * Allow for some variance when calculating lease expirations
- * @param maxLeasesForWorker
- * Max leases this Worker can handle at a time
- * @param maxLeasesToStealAtOneTime
- * Steal up to these many leases at a time (for load balancing)
- * @param initialLeaseTableReadCapacity
- * Initial dynamodb lease table read iops if creating the lease table
- * @param initialLeaseTableWriteCapacity
- * Initial dynamodb lease table write iops if creating the lease table
- * @param metricsFactory
- * Used to publish metrics about lease operations
- */
- @Deprecated
- public DynamoDBLeaseCoordinator(
- final LeaseRefresher leaseRefresher,
- final String workerIdentifier,
- final long leaseDurationMillis,
- final long epsilonMillis,
- final int maxLeasesForWorker,
- final int maxLeasesToStealAtOneTime,
- final int maxLeaseRenewerThreadCount,
- final long initialLeaseTableReadCapacity,
- final long initialLeaseTableWriteCapacity,
- final MetricsFactory metricsFactory) {
- this(
- leaseRefresher,
- workerIdentifier,
- leaseDurationMillis,
- LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT,
- epsilonMillis,
- maxLeasesForWorker,
- maxLeasesToStealAtOneTime,
- maxLeaseRenewerThreadCount,
- TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
- TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
- metricsFactory);
- }
-
/**
* Constructor.
*
@@ -214,17 +140,35 @@ public DynamoDBLeaseCoordinator(
final int maxLeaseRenewerThreadCount,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
- final MetricsFactory metricsFactory) {
+ final MetricsFactory metricsFactory,
+ final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
+ final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig,
+ final ConcurrentMap shardInfoShardConsumerMap) {
this.leaseRefresher = leaseRefresher;
- this.leaseRenewalThreadpool = getLeaseRenewalExecutorService(maxLeaseRenewerThreadCount);
+ this.leaseRenewalThreadpool = createExecutorService(maxLeaseRenewerThreadCount, LEASE_RENEWAL_THREAD_FACTORY);
this.leaseTaker = new DynamoDBLeaseTaker(leaseRefresher, workerIdentifier, leaseDurationMillis, metricsFactory)
.withMaxLeasesForWorker(maxLeasesForWorker)
.withMaxLeasesToStealAtOneTime(maxLeasesToStealAtOneTime)
.withEnablePriorityLeaseAssignment(enablePriorityLeaseAssignment);
- this.leaseRenewer = new DynamoDBLeaseRenewer(
- leaseRefresher, workerIdentifier, leaseDurationMillis, leaseRenewalThreadpool, metricsFactory);
this.renewerIntervalMillis = getRenewerTakerIntervalMillis(leaseDurationMillis, epsilonMillis);
this.takerIntervalMillis = (leaseDurationMillis + epsilonMillis) * 2;
+ // Should run once every leaseDurationMillis to identify new leases before expiry.
+ this.leaseDiscovererIntervalMillis = leaseDurationMillis - epsilonMillis;
+ this.leaseStatsRecorder = new LeaseStatsRecorder(renewerIntervalMillis, System::currentTimeMillis);
+ this.leaseGracefulShutdownHandler = LeaseGracefulShutdownHandler.create(
+ gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis(), shardInfoShardConsumerMap, this);
+ this.leaseRenewer = new DynamoDBLeaseRenewer(
+ leaseRefresher,
+ workerIdentifier,
+ leaseDurationMillis,
+ leaseRenewalThreadpool,
+ metricsFactory,
+ leaseStatsRecorder,
+ leaseGracefulShutdownHandler::enqueueShutdown);
+ this.leaseDiscoveryThreadPool =
+ createExecutorService(maxLeaseRenewerThreadCount, LEASE_DISCOVERY_THREAD_FACTORY);
+ this.leaseDiscoverer = new DynamoDBLeaseDiscoverer(
+ this.leaseRefresher, this.leaseRenewer, metricsFactory, workerIdentifier, leaseDiscoveryThreadPool);
if (initialLeaseTableReadCapacity <= 0) {
throw new IllegalArgumentException("readCapacity should be >= 1");
}
@@ -234,6 +178,7 @@ public DynamoDBLeaseCoordinator(
}
this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity;
this.metricsFactory = metricsFactory;
+ this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
log.info(
"With failover time {} ms and epsilon {} ms, LeaseCoordinator will renew leases every {} ms, take"
@@ -246,11 +191,49 @@ public DynamoDBLeaseCoordinator(
maxLeasesToStealAtOneTime);
}
+ @RequiredArgsConstructor
+ private class LeaseDiscoveryRunnable implements Runnable {
+ private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
+
+ @Override
+ public void run() {
+ try {
+ // LeaseDiscoverer is run in WORKER_UTILIZATION_AWARE_ASSIGNMENT mode only
+ synchronized (shutdownLock) {
+ if (!leaseAssignmentModeProvider
+ .getLeaseAssignmentMode()
+ .equals(
+ MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
+ .WORKER_UTILIZATION_AWARE_ASSIGNMENT)) {
+ return;
+ }
+ if (running) {
+ leaseRenewer.addLeasesToRenew(leaseDiscoverer.discoverNewLeases());
+ }
+ }
+ } catch (Exception e) {
+ log.error("Failed to execute lease discovery", e);
+ }
+ }
+ }
+
+ @RequiredArgsConstructor
private class TakerRunnable implements Runnable {
+ private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
@Override
public void run() {
try {
+ // LeaseTaker is run in DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT mode only
+ synchronized (shutdownLock) {
+ if (!leaseAssignmentModeProvider
+ .getLeaseAssignmentMode()
+ .equals(
+ MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
+ .DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)) {
+ return;
+ }
+ }
runLeaseTaker();
} catch (LeasingException e) {
log.error("LeasingException encountered in lease taking thread", e);
@@ -290,18 +273,35 @@ public void initialize() throws ProvisionedThroughputException, DependencyExcept
}
@Override
- public void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
+ public void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
+ throws DependencyException, InvalidStateException, ProvisionedThroughputException {
leaseRenewer.initialize();
+ // At max, we need 3 threads - lease renewer, lease taker, lease discoverer - to run without contention.
+ leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(3, LEASE_COORDINATOR_THREAD_FACTORY);
+
+ // During migration to KCLv3.x from KCLv2.x, lease assignment mode can change dynamically, so
+ // both lease assignment algorithms will be started but only one will execute based on
+ // leaseAssignmentModeProvider.getLeaseAssignmentMode(). However for new applications starting in
+ // KCLv3.x or applications successfully migrated to KCLv3.x, lease assignment mode will not
+ // change dynamically and will always be WORKER_UTILIZATION_AWARE_ASSIGNMENT, therefore
+ // don't initialize KCLv2.x lease assignment algorithm components that are not needed.
+ if (leaseAssignmentModeProvider.dynamicModeChangeSupportNeeded()) {
+ // Taker runs with fixed DELAY because we want it to run slower in the event of performance degradation.
+ takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
+ new TakerRunnable(leaseAssignmentModeProvider), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
+ }
- // 2 because we know we'll have at most 2 concurrent tasks at a time.
- leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(2, LEASE_COORDINATOR_THREAD_FACTORY);
+ leaseDiscoveryFuture = leaseCoordinatorThreadPool.scheduleAtFixedRate(
+ new LeaseDiscoveryRunnable(leaseAssignmentModeProvider),
+ 0L,
+ leaseDiscovererIntervalMillis,
+ TimeUnit.MILLISECONDS);
- // Taker runs with fixed DELAY because we want it to run slower in the event of performance degredation.
- takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
- new TakerRunnable(), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
- // Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degredation.
+ // Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degradation.
leaseCoordinatorThreadPool.scheduleAtFixedRate(
new RenewerRunnable(), 0L, renewerIntervalMillis, TimeUnit.MILLISECONDS);
+
+ leaseGracefulShutdownHandler.start();
running = true;
}
@@ -383,6 +383,8 @@ public void stop() {
}
leaseRenewalThreadpool.shutdownNow();
+ leaseCoordinatorThreadPool.shutdownNow();
+ leaseGracefulShutdownHandler.stop();
synchronized (shutdownLock) {
leaseRenewer.clearCurrentlyHeldLeases();
running = false;
@@ -393,6 +395,10 @@ public void stop() {
public void stopLeaseTaker() {
if (takerFuture != null) {
takerFuture.cancel(false);
+ leaseDiscoveryFuture.cancel(false);
+ // the method is called in worker graceful shutdown. We want to stop any further lease shutdown
+ // so we don't interrupt worker shutdown.
+ leaseGracefulShutdownHandler.stop();
}
}
@@ -418,20 +424,15 @@ public boolean updateLease(
}
/**
- * Returns executor service that should be used for lease renewal.
+ * Returns executor service for given ThreadFactory.
* @param maximumPoolSize Maximum allowed thread pool size
- * @return Executor service that should be used for lease renewal.
+ * @return Executor service
*/
- private static ExecutorService getLeaseRenewalExecutorService(int maximumPoolSize) {
+ private static ExecutorService createExecutorService(final int maximumPoolSize, final ThreadFactory threadFactory) {
int coreLeaseCount = Math.max(maximumPoolSize / 4, 2);
return new ThreadPoolExecutor(
- coreLeaseCount,
- maximumPoolSize,
- 60,
- TimeUnit.SECONDS,
- new LinkedTransferQueue<>(),
- LEASE_RENEWAL_THREAD_FACTORY);
+ coreLeaseCount, maximumPoolSize, 60, TimeUnit.SECONDS, new LinkedTransferQueue<>(), threadFactory);
}
@Override
@@ -472,6 +473,8 @@ public static ShardInfo convertLeaseToAssignment(final Lease lease) {
* {@inheritDoc}
*
* NOTE: This method is deprecated. Please set the initial capacity through the constructor.
+ *
+ * This is a method of the public lease coordinator interface.
*/
@Override
@Deprecated
@@ -487,6 +490,8 @@ public DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity)
* {@inheritDoc}
*
* NOTE: This method is deprecated. Please set the initial capacity through the constructor.
+ *
+ * This is a method of the public lease coordinator interface.
*/
@Override
@Deprecated
@@ -497,4 +502,9 @@ public DynamoDBLeaseCoordinator initialLeaseTableWriteCapacity(long writeCapacit
initialLeaseTableWriteCapacity = writeCapacity;
return this;
}
+
+ @Override
+ public LeaseStatsRecorder leaseStatsRecorder() {
+ return leaseStatsRecorder;
+ }
}
diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscoverer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscoverer.java
new file mode 100644
index 000000000..9774001c4
--- /dev/null
+++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscoverer.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2024 Amazon.com, Inc. or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package software.amazon.kinesis.leases.dynamodb;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.stream.Collectors;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import software.amazon.kinesis.annotations.KinesisClientInternalApi;
+import software.amazon.kinesis.leases.Lease;
+import software.amazon.kinesis.leases.LeaseDiscoverer;
+import software.amazon.kinesis.leases.LeaseRefresher;
+import software.amazon.kinesis.leases.LeaseRenewer;
+import software.amazon.kinesis.leases.exceptions.DependencyException;
+import software.amazon.kinesis.leases.exceptions.InvalidStateException;
+import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
+import software.amazon.kinesis.metrics.MetricsFactory;
+import software.amazon.kinesis.metrics.MetricsLevel;
+import software.amazon.kinesis.metrics.MetricsScope;
+import software.amazon.kinesis.metrics.MetricsUtil;
+
+import static java.util.Objects.isNull;
+
+/**
+ * An implementation of {@link LeaseDiscoverer}, it uses {@link LeaseRefresher} to query
+ * {@link DynamoDBLeaseRefresher#LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME } and find the leases assigned
+ * to current worker and then filter and returns the leases that have not started processing (looks at
+ * {@link LeaseRenewer#getCurrentlyHeldLeases()} to find out which leases are currently held leases).
+ */
+@Slf4j
+@RequiredArgsConstructor
+@KinesisClientInternalApi
+public class DynamoDBLeaseDiscoverer implements LeaseDiscoverer {
+
+ private final LeaseRefresher leaseRefresher;
+ private final LeaseRenewer leaseRenewer;
+ private final MetricsFactory metricsFactory;
+ private final String workerIdentifier;
+ private final ExecutorService executorService;
+
+ @Override
+ public List discoverNewLeases()
+ throws ProvisionedThroughputException, InvalidStateException, DependencyException {
+ final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(metricsFactory, "LeaseDiscovery");
+ long startTime = System.currentTimeMillis();
+ boolean success = false;
+ try {
+ final Set currentHeldLeaseKeys =
+ leaseRenewer.getCurrentlyHeldLeases().keySet();
+
+ final long listLeaseKeysForWorkerStartTime = System.currentTimeMillis();
+ final List