From a57238024e9f77a1c7d77e5451e01a79a582606b Mon Sep 17 00:00:00 2001 From: codenohup Date: Mon, 30 Dec 2024 15:33:44 +0800 Subject: [PATCH] [CELEBORN-1801] Remove out-of-dated flink 1.14 and 1.15 ### What changes were proposed in this pull request? Remove out-of-dated flink 1.14 and 1.15. For more information, please see the discussion thread: https://lists.apache.org/thread/njho00zmkjx5qspcrbrkogy8s4zzmwv9 ### Why are the changes needed? Reduce maintenance burden. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Changes can be covered by existing tests. Closes #3029 from codenohup/remove-flink14and15. Authored-by: codenohup Signed-off-by: Cheng Pan --- .github/workflows/deps.yml | 4 - .github/workflows/license.yml | 2 - .github/workflows/maven.yml | 2 - .github/workflows/sbt.yml | 2 - .github/workflows/style.yml | 2 - README.md | 10 +- build/make-distribution.sh | 4 - build/release/release.sh | 6 - client-flink/flink-1.14-shaded/pom.xml | 137 ---- .../src/main/resources/META-INF/LICENSE | 248 ------- .../src/main/resources/META-INF/NOTICE | 45 -- .../META-INF/licenses/LICENSE-protobuf.txt | 42 -- client-flink/flink-1.14/pom.xml | 75 --- .../flink/RemoteShuffleEnvironment.java | 138 ---- .../plugin/flink/RemoteShuffleInputGate.java | 270 -------- .../flink/RemoteShuffleInputGateFactory.java | 57 -- .../flink/RemoteShuffleResultPartition.java | 218 ------ .../RemoteShuffleResultPartitionFactory.java | 85 --- .../flink/RemoteShuffleServiceFactory.java | 63 -- .../flink/SimpleResultPartitionAdapter.java | 28 - .../netty/NettyShuffleEnvironmentWrapper.java | 95 --- .../flink/RemoteShuffleMasterSuiteJ.java | 351 ---------- ...teShuffleResultPartitionFactorySuiteJ.java | 55 -- .../RemoteShuffleResultPartitionSuiteJ.java | 618 ------------------ .../RemoteShuffleServiceFactorySuiteJ.java | 58 -- .../flink/ShuffleResourceTrackerSuiteJ.java | 138 ---- .../src/test/resources/log4j2-test.xml | 41 -- client-flink/flink-1.15-shaded/pom.xml | 137 ---- .../src/main/resources/META-INF/LICENSE | 248 ------- .../src/main/resources/META-INF/NOTICE | 45 -- .../META-INF/licenses/LICENSE-protobuf.txt | 42 -- client-flink/flink-1.15/pom.xml | 75 --- .../flink/RemoteShuffleEnvironment.java | 139 ---- .../plugin/flink/RemoteShuffleInputGate.java | 287 -------- .../flink/RemoteShuffleInputGateFactory.java | 57 -- .../flink/RemoteShuffleResultPartition.java | 220 ------- .../RemoteShuffleResultPartitionFactory.java | 86 --- .../flink/RemoteShuffleServiceFactory.java | 63 -- .../flink/SimpleResultPartitionAdapter.java | 28 - .../netty/NettyShuffleEnvironmentWrapper.java | 95 --- .../flink/RemoteShuffleMasterSuiteJ.java | 351 ---------- ...teShuffleResultPartitionFactorySuiteJ.java | 55 -- .../RemoteShuffleResultPartitionSuiteJ.java | 618 ------------------ .../RemoteShuffleServiceFactorySuiteJ.java | 58 -- .../flink/ShuffleResourceTrackerSuiteJ.java | 138 ---- dev/dependencies.sh | 8 - dev/deps/dependencies-client-flink-1.14 | 81 --- dev/deps/dependencies-client-flink-1.15 | 81 --- dev/reformat | 2 - docs/README.md | 6 +- docs/deploy.md | 4 +- docs/developers/overview.md | 2 +- docs/developers/sbt.md | 26 +- docs/migration.md | 2 + pom.xml | 41 +- project/CelebornBuild.scala | 28 - .../celeborn/tests/flink/FlinkVersion.java | 6 - 57 files changed, 25 insertions(+), 5798 deletions(-) delete mode 100644 client-flink/flink-1.14-shaded/pom.xml delete mode 100644 client-flink/flink-1.14-shaded/src/main/resources/META-INF/LICENSE delete mode 100644 client-flink/flink-1.14-shaded/src/main/resources/META-INF/NOTICE delete mode 100644 client-flink/flink-1.14-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt delete mode 100644 client-flink/flink-1.14/pom.xml delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java delete mode 100644 client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java delete mode 100644 client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java delete mode 100644 client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java delete mode 100644 client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java delete mode 100644 client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java delete mode 100644 client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java delete mode 100644 client-flink/flink-1.14/src/test/resources/log4j2-test.xml delete mode 100644 client-flink/flink-1.15-shaded/pom.xml delete mode 100644 client-flink/flink-1.15-shaded/src/main/resources/META-INF/LICENSE delete mode 100644 client-flink/flink-1.15-shaded/src/main/resources/META-INF/NOTICE delete mode 100644 client-flink/flink-1.15-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt delete mode 100644 client-flink/flink-1.15/pom.xml delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java delete mode 100644 client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java delete mode 100644 client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java delete mode 100644 client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java delete mode 100644 client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java delete mode 100644 client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java delete mode 100644 client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java delete mode 100644 dev/deps/dependencies-client-flink-1.14 delete mode 100644 dev/deps/dependencies-client-flink-1.15 diff --git a/.github/workflows/deps.yml b/.github/workflows/deps.yml index 239d2af0c44..6d85952c2b2 100644 --- a/.github/workflows/deps.yml +++ b/.github/workflows/deps.yml @@ -48,8 +48,6 @@ jobs: - 'spark-3.3' - 'spark-3.4' - 'spark-3.5' - - 'flink-1.14' - - 'flink-1.15' - 'flink-1.16' - 'flink-1.17' - 'flink-1.18' @@ -82,8 +80,6 @@ jobs: - 'spark-3.3' - 'spark-3.4' - 'spark-3.5' - - 'flink-1.14' - - 'flink-1.15' - 'flink-1.16' - 'flink-1.17' - 'flink-1.18' diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index ce69136975c..83db2af9665 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -43,8 +43,6 @@ jobs: cache: 'maven' check-latest: false - run: | - build/mvn org.apache.rat:apache-rat-plugin:check -Pgoogle-mirror,flink-1.14 - build/mvn org.apache.rat:apache-rat-plugin:check -Pgoogle-mirror,flink-1.15 build/mvn org.apache.rat:apache-rat-plugin:check -Pgoogle-mirror,flink-1.16 build/mvn org.apache.rat:apache-rat-plugin:check -Pgoogle-mirror,flink-1.17 build/mvn org.apache.rat:apache-rat-plugin:check -Pgoogle-mirror,flink-1.18 diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 7aeb0e45c38..f23b7f39913 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -159,8 +159,6 @@ jobs: - 8 - 11 flink: - - '1.14' - - '1.15' - '1.16' - '1.17' - '1.18' diff --git a/.github/workflows/sbt.yml b/.github/workflows/sbt.yml index 8680de6285f..8dcc64fa8f0 100644 --- a/.github/workflows/sbt.yml +++ b/.github/workflows/sbt.yml @@ -205,8 +205,6 @@ jobs: - 8 - 11 flink: - - '1.14' - - '1.15' - '1.16' - '1.17' - '1.18' diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 880f24e9389..a24052e1069 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -47,8 +47,6 @@ jobs: cache: 'maven' check-latest: false - run: | - build/mvn spotless:check -Pgoogle-mirror,flink-1.14 - build/mvn spotless:check -Pgoogle-mirror,flink-1.15 build/mvn spotless:check -Pgoogle-mirror,flink-1.16 build/mvn spotless:check -Pgoogle-mirror,flink-1.17 build/mvn spotless:check -Pgoogle-mirror,flink-1.18 diff --git a/README.md b/README.md index 5305181fc9f..a1d91b5a29f 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,12 @@ Celeborn Worker's slot count is decided by `total usable disk size / average shu Celeborn worker's slot count decreases when a partition is allocated and increments when a partition is freed. ## Build -1. Celeborn supports Spark 2.4/3.0/3.1/3.2/3.3/3.4/3.5, Flink 1.14/1.15/1.16/1.17/1.18/1.19/1.20 and Hadoop MapReduce 2/3. +1. Celeborn supports Spark 2.4/3.0/3.1/3.2/3.3/3.4/3.5, Flink 1.16/1.17/1.18/1.19/1.20 and Hadoop MapReduce 2/3. 2. Celeborn tested under Scala 2.11/2.12/2.13 and Java 8/11/17 environment. Build Celeborn via `make-distribution.sh`: ```shell -./build/make-distribution.sh -Pspark-2.4/-Pspark-3.0/-Pspark-3.1/-Pspark-3.2/-Pspark-3.3/-Pspark-3.4/-Pspark-3.5/-Pflink-1.14/-Pflink-1.15/-Pflink-1.16/-Pflink-1.17/-Pflink-1.18/-Pflink-1.19/-Pflink-1.20/-Pmr +./build/make-distribution.sh -Pspark-2.4/-Pspark-3.0/-Pspark-3.1/-Pspark-3.2/-Pspark-3.3/-Pspark-3.4/-Pspark-3.5/-Pflink-1.16/-Pflink-1.17/-Pflink-1.18/-Pflink-1.19/-Pflink-1.20/-Pmr ``` Package `apache-celeborn-${project.version}-bin.tgz` will be generated. @@ -62,8 +62,6 @@ Package `apache-celeborn-${project.version}-bin.tgz` will be generated. | Spark 3.3 | ❌ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | | Spark 3.4 | ❌ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | | Spark 3.5 | ❌ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| Flink 1.14 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | -| Flink 1.15 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | | Flink 1.16 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | | Flink 1.17 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | | Flink 1.18 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | @@ -114,7 +112,7 @@ Celeborn server is compatible with all clients inside various engines. However, Celeborn clients must be consistent with the version of the specified engine. For example, if you are running Spark 2.4, you must compile Celeborn client with -Pspark-2.4; if you are running Spark 3.2, you must compile Celeborn client with -Pspark-3.2; -if you are running flink 1.14, you must compile Celeborn client with -Pflink-1.14. +if you are running flink 1.16, you must compile Celeborn client with -Pflink-1.16. ## Usage Celeborn cluster composes of Master and Worker nodes, the Master supports both single and HA mode(Raft-based) deployments. @@ -318,7 +316,7 @@ spark.executor.userClassPathFirst false Copy `$CELEBORN_HOME/flink/*.jar` to `$FLINK_HOME/lib/`. #### Flink Configuration -Celeborn supports two Flink integration strategies: remote shuffle service (since Flink 1.14) and [hybrid shuffle](https://nightlies.apache.org/flink/flink-docs-stable/docs/ops/batch/batch_shuffle/#hybrid-shuffle) (since Flink 1.20). +Celeborn supports two Flink integration strategies: remote shuffle service (since Flink 1.16) and [hybrid shuffle](https://nightlies.apache.org/flink/flink-docs-stable/docs/ops/batch/batch_shuffle/#hybrid-shuffle) (since Flink 1.20). To use Celeborn, you can choose one of them and add the following Flink configurations. diff --git a/build/make-distribution.sh b/build/make-distribution.sh index d49eab487ef..8bc42d4793d 100755 --- a/build/make-distribution.sh +++ b/build/make-distribution.sh @@ -342,8 +342,6 @@ if [ "$SBT_ENABLED" == "true" ]; then sbt_build_client -Pspark-2.4 sbt_build_client -Pspark-3.4 sbt_build_client -Pspark-3.5 - sbt_build_client -Pflink-1.14 - sbt_build_client -Pflink-1.15 sbt_build_client -Pflink-1.16 sbt_build_client -Pflink-1.17 sbt_build_client -Pflink-1.18 @@ -378,8 +376,6 @@ else build_spark_client -Pspark-2.4 build_spark_client -Pspark-3.4 build_spark_client -Pspark-3.5 - build_flink_client -Pflink-1.14 - build_flink_client -Pflink-1.15 build_flink_client -Pflink-1.16 build_flink_client -Pflink-1.17 build_flink_client -Pflink-1.18 diff --git a/build/release/release.sh b/build/release/release.sh index 6030b8b7c3b..ff476cf016f 100755 --- a/build/release/release.sh +++ b/build/release/release.sh @@ -104,12 +104,6 @@ upload_nexus_staging() { echo "Deploying celeborn-client-spark-3-shaded_2.13" ${PROJECT_DIR}/build/sbt -Pspark-3.4 ++2.13 "clean;celeborn-client-spark-3-shaded/publishSigned" - echo "Deploying celeborn-client-flink-1.14-shaded_2.12" - ${PROJECT_DIR}/build/sbt -Pflink-1.14 "clean;celeborn-client-flink-1_14-shaded/publishSigned" - - echo "Deploying celeborn-client-flink-1.15-shaded_2.12" - ${PROJECT_DIR}/build/sbt -Pflink-1.15 "clean;celeborn-client-flink-1_15-shaded/publishSigned" - echo "Deploying celeborn-client-flink-1.16-shaded_2.12" ${PROJECT_DIR}/build/sbt -Pflink-1.16 "clean;celeborn-client-flink-1_16-shaded/publishSigned" diff --git a/client-flink/flink-1.14-shaded/pom.xml b/client-flink/flink-1.14-shaded/pom.xml deleted file mode 100644 index 29e56cd4267..00000000000 --- a/client-flink/flink-1.14-shaded/pom.xml +++ /dev/null @@ -1,137 +0,0 @@ - - - - 4.0.0 - - org.apache.celeborn - celeborn-parent_${scala.binary.version} - ${project.version} - ../../pom.xml - - - celeborn-client-flink-1.14-shaded_${scala.binary.version} - jar - Celeborn Shaded Client for Flink 1.14 - - - - org.apache.celeborn - celeborn-client-flink-1.14_${scala.binary.version} - ${project.version} - - - - - - - org.apache.maven.plugins - maven-shade-plugin - - - - com.google.protobuf - ${shading.prefix}.com.google.protobuf - - - com.google.common - ${shading.prefix}.com.google.common - - - io.netty - ${shading.prefix}.io.netty - - - org.apache.commons - ${shading.prefix}.org.apache.commons - - - org.roaringbitmap - ${shading.prefix}.org.roaringbitmap - - - - - org.apache.celeborn:* - com.google.protobuf:protobuf-java - com.google.guava:guava - com.google.guava:failureaccess - io.netty:* - org.apache.commons:commons-lang3 - org.roaringbitmap:RoaringBitmap - - - - - *:* - - **/*.proto - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - **/log4j.properties - META-INF/LICENSE.txt - META-INF/NOTICE.txt - LICENSE.txt - NOTICE.txt - - - - - - - - - - org.apache.maven.plugins - maven-antrun-plugin - ${maven.plugin.antrun.version} - - - rename-native-library - - run - - package - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/client-flink/flink-1.14-shaded/src/main/resources/META-INF/LICENSE b/client-flink/flink-1.14-shaded/src/main/resources/META-INF/LICENSE deleted file mode 100644 index 924ef2c85f4..00000000000 --- a/client-flink/flink-1.14-shaded/src/main/resources/META-INF/LICENSE +++ /dev/null @@ -1,248 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ------------------------------------------------------------------------------------- -This project bundles the following dependencies under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0.txt): - - -Apache License 2.0 --------------------------------------- - -com.google.guava:failureaccess -com.google.guava:guava -io.netty:netty-all -io.netty:netty-buffer -io.netty:netty-codec -io.netty:netty-codec-dns -io.netty:netty-codec-haproxy -io.netty:netty-codec-http -io.netty:netty-codec-http2 -io.netty:netty-codec-memcache -io.netty:netty-codec-mqtt -io.netty:netty-codec-redis -io.netty:netty-codec-smtp -io.netty:netty-codec-socks -io.netty:netty-codec-stomp -io.netty:netty-codec-xml -io.netty:netty-common -io.netty:netty-handler -io.netty:netty-handler-proxy -io.netty:netty-resolver -io.netty:netty-resolver-dns -io.netty:netty-transport -io.netty:netty-transport-classes-epoll -io.netty:netty-transport-classes-kqueue -io.netty:netty-transport-native-epoll -io.netty:netty-transport-native-kqueue -io.netty:netty-transport-native-unix-common -io.netty:netty-transport-rxtx -io.netty:netty-transport-sctp -io.netty:netty-transport-udt -org.apache.commons:commons-lang3 -org.roaringbitmap:RoaringBitmap - - -BSD 3-clause ------------- -See licenses/LICENSE-protobuf.txt for details. -com.google.protobuf:protobuf-java diff --git a/client-flink/flink-1.14-shaded/src/main/resources/META-INF/NOTICE b/client-flink/flink-1.14-shaded/src/main/resources/META-INF/NOTICE deleted file mode 100644 index 43452a38afe..00000000000 --- a/client-flink/flink-1.14-shaded/src/main/resources/META-INF/NOTICE +++ /dev/null @@ -1,45 +0,0 @@ - -Apache Celeborn -Copyright 2022-2024 The Apache Software Foundation. - -This product includes software developed at -The Apache Software Foundation (https://www.apache.org/). - -Apache Spark -Copyright 2014 and onwards The Apache Software Foundation - -Apache Kyuubi -Copyright 2021-2023 The Apache Software Foundation - -Apache Iceberg -Copyright 2017-2022 The Apache Software Foundation - -Apache Parquet MR -Copyright 2014-2024 The Apache Software Foundation - -This project includes code from Kite, developed at Cloudera, Inc. with -the following copyright notice: - -| Copyright 2013 Cloudera Inc. -| -| Licensed under the Apache License, Version 2.0 (the "License"); -| you may not use this file except in compliance with the License. -| You may obtain a copy of the License at -| -| http://www.apache.org/licenses/LICENSE-2.0 -| -| Unless required by applicable law or agreed to in writing, software -| distributed under the License is distributed on an "AS IS" BASIS, -| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -| See the License for the specific language governing permissions and -| limitations under the License. - -Remote Shuffle Service for Flink -Copyright 2021 The Flink Remote Shuffle Project - -============================================================================= -= NOTICE file corresponding to section 4d of the Apache License Version 2.0 = -============================================================================= - -Apache Commons Lang -Copyright 2001-2021 The Apache Software Foundation diff --git a/client-flink/flink-1.14-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt b/client-flink/flink-1.14-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt deleted file mode 100644 index b4350ec83c7..00000000000 --- a/client-flink/flink-1.14-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt +++ /dev/null @@ -1,42 +0,0 @@ -This license applies to all parts of Protocol Buffers except the following: - - - Atomicops support for generic gcc, located in - src/google/protobuf/stubs/atomicops_internals_generic_gcc.h. - This file is copyrighted by Red Hat Inc. - - - Atomicops support for AIX/POWER, located in - src/google/protobuf/stubs/atomicops_internals_aix.h. - This file is copyrighted by Bloomberg Finance LP. - -Copyright 2014, Google Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Code generated by the Protocol Buffer compiler is owned by the owner -of the input file used when generating it. This code is not -standalone and requires a support library to be linked with it. This -support library is itself covered by the above license. \ No newline at end of file diff --git a/client-flink/flink-1.14/pom.xml b/client-flink/flink-1.14/pom.xml deleted file mode 100644 index d8a09d3829c..00000000000 --- a/client-flink/flink-1.14/pom.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - 4.0.0 - - org.apache.celeborn - celeborn-parent_${scala.binary.version} - ${project.version} - ../../pom.xml - - - celeborn-client-flink-1.14_${scala.binary.version} - jar - Celeborn Client for Flink 1.14 - - - - org.apache.celeborn - celeborn-common_${scala.binary.version} - ${project.version} - - - org.apache.celeborn - celeborn-client_${scala.binary.version} - ${project.version} - - - org.apache.celeborn - celeborn-client-flink-common_${scala.binary.version} - ${project.version} - - - org.apache.flink - flink-runtime - provided - - - org.xerial.snappy - snappy-java - - - - - - org.mockito - mockito-core - test - - - org.apache.logging.log4j - log4j-slf4j-impl - test - - - org.apache.logging.log4j - log4j-1.2-api - test - - - diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java deleted file mode 100644 index 60b0e020d6b..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.util.Collection; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; - -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor; -import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor; -import org.apache.flink.runtime.io.network.NettyShuffleEnvironment; -import org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter; -import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool; -import org.apache.flink.runtime.io.network.metrics.InputChannelMetrics; -import org.apache.flink.runtime.io.network.partition.PartitionProducerStateProvider; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; -import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID; -import org.apache.flink.runtime.shuffle.ShuffleEnvironment; -import org.apache.flink.runtime.shuffle.ShuffleIOOwnerContext; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.plugin.flink.netty.NettyShuffleEnvironmentWrapper; - -/** - * The implementation of {@link ShuffleEnvironment} based on the remote shuffle service, providing - * shuffle environment on flink TM side. - */ -public class RemoteShuffleEnvironment extends AbstractRemoteShuffleEnvironment - implements ShuffleEnvironment { - - /** Factory class to create {@link RemoteShuffleResultPartition}. */ - private final RemoteShuffleResultPartitionFactory resultPartitionFactory; - - private final RemoteShuffleInputGateFactory inputGateFactory; - - private final NettyShuffleEnvironmentWrapper shuffleEnvironmentWrapper; - - private final ConcurrentHashMap.KeySetView nettyResultIds = - ConcurrentHashMap.newKeySet(); - - private final ConcurrentHashMap.KeySetView - nettyResultPartitionIds = ConcurrentHashMap.newKeySet(); - - /** - * @param networkBufferPool Network buffer pool for shuffle read and shuffle write. - * @param resultPartitionManager A trivial {@link ResultPartitionManager}. - * @param resultPartitionFactory Factory class to create {@link RemoteShuffleResultPartition}. - * @param inputGateFactory Factory class to create {@link RemoteShuffleInputGate}. - * @param shuffleEnvironmentWrapper Wrapper class to create {@link NettyShuffleEnvironment}. - */ - public RemoteShuffleEnvironment( - NetworkBufferPool networkBufferPool, - ResultPartitionManager resultPartitionManager, - RemoteShuffleResultPartitionFactory resultPartitionFactory, - RemoteShuffleInputGateFactory inputGateFactory, - CelebornConf conf, - NettyShuffleEnvironmentWrapper shuffleEnvironmentWrapper) { - super(networkBufferPool, resultPartitionManager, conf); - this.resultPartitionFactory = resultPartitionFactory; - this.inputGateFactory = inputGateFactory; - this.shuffleEnvironmentWrapper = shuffleEnvironmentWrapper; - } - - @Override - public ResultPartitionWriter createResultPartitionWriterInternal( - ShuffleIOOwnerContext ownerContext, - int index, - ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor, - CelebornConf conf) { - if (resultPartitionDeploymentDescriptor.getShuffleDescriptor() - instanceof RemoteShuffleDescriptor) { - return resultPartitionFactory.create( - ownerContext.getOwnerName(), index, resultPartitionDeploymentDescriptor, conf); - } else { - nettyResultIds.add(resultPartitionDeploymentDescriptor.getResultId()); - nettyResultPartitionIds.add(resultPartitionDeploymentDescriptor.getPartitionId()); - return shuffleEnvironmentWrapper - .nettyResultPartitionFactory() - .create(ownerContext.getOwnerName(), index, resultPartitionDeploymentDescriptor); - } - } - - @Override - IndexedInputGate createInputGateInternal( - ShuffleIOOwnerContext ownerContext, - PartitionProducerStateProvider producerStateProvider, - int gateIndex, - InputGateDeploymentDescriptor igdd) { - return nettyResultIds.contains(igdd.getConsumedResultId()) - ? shuffleEnvironmentWrapper - .nettyInputGateFactory() - .create( - ownerContext.getOwnerName(), - gateIndex, - igdd, - producerStateProvider, - new InputChannelMetrics( - ownerContext.getInputGroup(), ownerContext.getParentGroup())) - : inputGateFactory.create(ownerContext.getOwnerName(), gateIndex, igdd); - } - - public void releasePartitionsLocally(Collection partitionIds) { - List resultPartitionIds = - partitionIds.stream() - .filter(partitionId -> nettyResultPartitionIds.contains(partitionId.getPartitionId())) - .collect(Collectors.toList()); - if (!resultPartitionIds.isEmpty()) { - shuffleEnvironmentWrapper - .nettyShuffleEnvironment() - .releasePartitionsLocally(resultPartitionIds); - } - } - - @VisibleForTesting - RemoteShuffleResultPartitionFactory getResultPartitionFactory() { - return resultPartitionFactory; - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java deleted file mode 100644 index 284e45818db..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.io.IOException; -import java.net.InetSocketAddress; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.CompletableFuture; - -import org.apache.flink.core.memory.MemorySegment; -import org.apache.flink.core.memory.MemorySegmentProvider; -import org.apache.flink.metrics.SimpleCounter; -import org.apache.flink.runtime.checkpoint.CheckpointOptions; -import org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter; -import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo; -import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo; -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor; -import org.apache.flink.runtime.event.TaskEvent; -import org.apache.flink.runtime.io.network.ConnectionID; -import org.apache.flink.runtime.io.network.LocalConnectionManager; -import org.apache.flink.runtime.io.network.api.CheckpointBarrier; -import org.apache.flink.runtime.io.network.buffer.Buffer; -import org.apache.flink.runtime.io.network.buffer.BufferDecompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.runtime.io.network.partition.consumer.*; -import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; -import org.apache.flink.util.CloseableIterator; -import org.apache.flink.util.FlinkRuntimeException; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.common.CelebornConf; - -/** A {@link IndexedInputGate} which ingest data from remote shuffle workers. */ -public class RemoteShuffleInputGate extends IndexedInputGate { - - RemoteShuffleInputGateDelegation inputGateDelegation; - - public RemoteShuffleInputGate( - CelebornConf celebornConf, - String taskName, - int gateIndex, - InputGateDeploymentDescriptor gateDescriptor, - SupplierWithException bufferPoolFactory, - BufferDecompressor bufferDecompressor, - int numConcurrentReading) { - inputGateDelegation = - new RemoteShuffleInputGateDelegation( - celebornConf, - taskName, - gateIndex, - gateDescriptor, - bufferPoolFactory, - bufferDecompressor, - numConcurrentReading, - availabilityHelper, - gateDescriptor.getConsumedSubpartitionIndex(), - gateDescriptor.getConsumedSubpartitionIndex()); - } - - /** Setup gate and build network connections. */ - @Override - public void setup() throws IOException { - inputGateDelegation.setup(); - } - - /** Index of the gate of the corresponding computing task. */ - @Override - public int getGateIndex() { - return inputGateDelegation.getGateIndex(); - } - - /** Get number of input channels. A channel is a data flow from one shuffle worker. */ - @Override - public int getNumberOfInputChannels() { - return inputGateDelegation.getBufferReaders().size(); - } - - /** Whether reading is finished -- all channels are finished and cached buffers are drained. */ - @Override - public boolean isFinished() { - return inputGateDelegation.isFinished(); - } - - @Override - public boolean hasReceivedEndOfData() { - return inputGateDelegation.hasReceivedEndOfData(); - } - - @Override - public Optional getNext() { - throw new UnsupportedOperationException("Not implemented (DataSet API is not supported)."); - } - - /** Poll a received {@link BufferOrEvent}. */ - @Override - public Optional pollNext() throws IOException { - return inputGateDelegation.pollNext(); - } - - /** Close all reading channels inside this {@link RemoteShuffleInputGate}. */ - @Override - public void close() throws Exception { - inputGateDelegation.close(); - } - - /** Get {@link InputChannelInfo}s of this {@link RemoteShuffleInputGate}. */ - @Override - public List getChannelInfos() { - return inputGateDelegation.getChannelsInfo(); - } - - @Override - public void requestPartitions() { - // do-nothing - } - - @Override - public void checkpointStarted(CheckpointBarrier barrier) { - // do-nothing. - } - - @Override - public void checkpointStopped(long cancelledCheckpointId) { - // do-nothing. - } - - @Override - public int getBuffersInUseCount() { - return 0; - } - - @Override - public void announceBufferSize(int i) {} - - @Override - public List getUnfinishedChannels() { - return Collections.emptyList(); - } - - @Override - public void finishReadRecoveredState() { - // do-nothing. - } - - @Override - public InputChannel getChannel(int channelIndex) { - return new FakedRemoteInputChannel(channelIndex); - } - - @Override - public void sendTaskEvent(TaskEvent event) { - throw new FlinkRuntimeException("Method should not be called."); - } - - @Override - public void resumeConsumption(InputChannelInfo channelInfo) { - throw new FlinkRuntimeException("Method should not be called."); - } - - @Override - public void acknowledgeAllRecordsProcessed(InputChannelInfo inputChannelInfo) {} - - @Override - public CompletableFuture getStateConsumedFuture() { - return CompletableFuture.completedFuture(null); - } - - @Override - public String toString() { - return String.format( - "ReadGate [owning task: %s, gate index: %d, descriptor: %s]", - inputGateDelegation.getTaskName(), - inputGateDelegation.getGateIndex(), - inputGateDelegation.getGateDescriptor().toString()); - } - - /** Accommodation for the incompleteness of Flink pluggable shuffle service. */ - private class FakedRemoteInputChannel extends RemoteInputChannel { - FakedRemoteInputChannel(int channelIndex) { - super( - new SingleInputGate( - inputGateDelegation.getTaskName(), - inputGateDelegation.getGateIndex(), - new IntermediateDataSetID(), - ResultPartitionType.BLOCKING, - 0, - 1, - (a, b, c) -> {}, - () -> null, - null, - new FakedMemorySegmentProvider(), - 0), - channelIndex, - new ResultPartitionID(), - new ConnectionID(new InetSocketAddress("", 0), 0), - new LocalConnectionManager(), - 0, - 0, - 0, - new SimpleCounter(), - new SimpleCounter(), - new FakedChannelStateWriter()); - } - } - - /** Accommodation for the incompleteness of Flink pluggable shuffle service. */ - private static class FakedMemorySegmentProvider implements MemorySegmentProvider { - - @Override - public Collection requestUnpooledMemorySegments(int i) throws IOException { - return null; - } - - @Override - public void recycleUnpooledMemorySegments(Collection collection) - throws IOException {} - } - - /** Accommodation for the incompleteness of Flink pluggable shuffle service. */ - private static class FakedChannelStateWriter implements ChannelStateWriter { - - @Override - public void start(long cpId, CheckpointOptions checkpointOptions) {} - - @Override - public void addInputData( - long cpId, InputChannelInfo info, int startSeqNum, CloseableIterator data) {} - - @Override - public void addOutputData( - long cpId, ResultSubpartitionInfo info, int startSeqNum, Buffer... data) {} - - @Override - public void finishInput(long checkpointId) {} - - @Override - public void finishOutput(long checkpointId) {} - - @Override - public void abort(long checkpointId, Throwable cause, boolean cleanup) {} - - @Override - public ChannelStateWriteResult getAndRemoveWriteResult(long checkpointId) { - return null; - } - - @Override - public void close() {} - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java deleted file mode 100644 index d1402c3866e..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.io.IOException; - -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor; -import org.apache.flink.runtime.io.network.buffer.BufferDecompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.common.CelebornConf; - -/** Factory class to create {@link RemoteShuffleInputGate}. */ -public class RemoteShuffleInputGateFactory extends AbstractRemoteShuffleInputGateFactory { - - public RemoteShuffleInputGateFactory( - CelebornConf conf, NetworkBufferPool networkBufferPool, int networkBufferSize) { - super(conf, networkBufferPool, networkBufferSize); - } - - @Override - protected RemoteShuffleInputGate createInputGate( - String owningTaskName, - int gateIndex, - InputGateDeploymentDescriptor igdd, - SupplierWithException bufferPoolFactory, - String compressionCodec) { - BufferDecompressor bufferDecompressor = - new BufferDecompressor(networkBufferSize, compressionCodec); - return new RemoteShuffleInputGate( - celebornConf, - owningTaskName, - gateIndex, - igdd, - bufferPoolFactory, - bufferDecompressor, - numConcurrentReading); - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java deleted file mode 100644 index 286a7677f33..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.apache.celeborn.plugin.flink.utils.Utils.checkNotNull; -import static org.apache.celeborn.plugin.flink.utils.Utils.checkState; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.concurrent.CompletableFuture; - -import javax.annotation.Nullable; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.flink.runtime.event.AbstractEvent; -import org.apache.flink.runtime.io.network.api.EndOfData; -import org.apache.flink.runtime.io.network.api.EndOfPartitionEvent; -import org.apache.flink.runtime.io.network.api.serialization.EventSerializer; -import org.apache.flink.runtime.io.network.buffer.Buffer; -import org.apache.flink.runtime.io.network.buffer.Buffer.DataType; -import org.apache.flink.runtime.io.network.buffer.BufferCompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.partition.BufferAvailabilityListener; -import org.apache.flink.runtime.io.network.partition.ResultPartition; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.runtime.io.network.partition.ResultSubpartitionView; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.plugin.flink.buffer.BufferWithSubpartition; -import org.apache.celeborn.plugin.flink.buffer.DataBuffer; -import org.apache.celeborn.plugin.flink.utils.BufferUtils; -import org.apache.celeborn.plugin.flink.utils.Utils; - -/** - * A {@link ResultPartition} which appends records and events to {@link DataBuffer} and after the - * {@link DataBuffer} is full, all data in the {@link DataBuffer} will be copied and spilled to the - * remote shuffle service in subpartition index order sequentially. Large records that can not be - * appended to an empty {@link DataBuffer} will be spilled directly. - */ -public class RemoteShuffleResultPartition extends ResultPartition { - - RemoteShuffleResultPartitionDelegation delegation; - - private final SupplierWithException bufferPoolFactory; - - public RemoteShuffleResultPartition( - String owningTaskName, - int partitionIndex, - ResultPartitionID partitionId, - ResultPartitionType partitionType, - int numSubpartitions, - int numTargetKeyGroups, - int networkBufferSize, - ResultPartitionManager partitionManager, - @Nullable BufferCompressor bufferCompressor, - SupplierWithException bufferPoolFactory, - RemoteShuffleOutputGate outputGate) { - - super( - owningTaskName, - partitionIndex, - partitionId, - partitionType, - numSubpartitions, - numTargetKeyGroups, - partitionManager, - bufferCompressor, - bufferPoolFactory); - - delegation = - new RemoteShuffleResultPartitionDelegation( - networkBufferSize, outputGate, this::updateStatistics, numSubpartitions); - this.bufferPoolFactory = bufferPoolFactory; - } - - @Override - public void setup() throws IOException { - // We can't call the `setup` method of the base class, otherwise it will cause a partition leak. - // The reason is that this partition will be registered to the partition manager during - // `super.setup()`. - // Since this is a cluster/remote partition(i.e. resources are not stored on the Flink TM), - // Flink does not trigger the resource releasing over TM. Therefore, the partition object is - // leaked. - // So we copy the logic of `setup` but don't register partition to partition manager. - checkState( - this.bufferPool == null, - "Bug in result partition setup logic: Already registered buffer pool."); - this.bufferPool = checkNotNull(bufferPoolFactory.get()); - BufferUtils.reserveNumRequiredBuffers(bufferPool, 1); - delegation.setup( - bufferPool, bufferCompressor, this::canBeCompressed, this::checkInProduceState); - } - - @Override - public void emitRecord(ByteBuffer record, int targetSubpartition) throws IOException { - delegation.emit(record, targetSubpartition, DataType.DATA_BUFFER, false); - } - - @Override - public void broadcastRecord(ByteBuffer record) throws IOException { - delegation.broadcast(record, DataType.DATA_BUFFER); - } - - @Override - public void broadcastEvent(AbstractEvent event, boolean isPriorityEvent) throws IOException { - Buffer buffer = EventSerializer.toBuffer(event, isPriorityEvent); - try { - ByteBuffer serializedEvent = buffer.getNioBufferReadable(); - delegation.broadcast(serializedEvent, buffer.getDataType()); - } finally { - buffer.recycleBuffer(); - } - } - - @Override - public void finish() throws IOException { - Utils.checkState(!isReleased(), "Result partition is already released."); - broadcastEvent(EndOfPartitionEvent.INSTANCE, false); - delegation.finish(); - super.finish(); - } - - @Override - public synchronized void close() { - delegation.close(super::close); - } - - @Override - protected void releaseInternal() { - // no-op - } - - @Override - public void flushAll() { - delegation.flushAll(); - } - - @Override - public void flush(int subpartitionIndex) { - flushAll(); - } - - @Override - public CompletableFuture getAvailableFuture() { - return AVAILABLE; - } - - @Override - public int getNumberOfQueuedBuffers() { - return 0; - } - - @Override - public int getNumberOfQueuedBuffers(int targetSubpartition) { - return 0; - } - - @Override - public ResultSubpartitionView createSubpartitionView( - int index, BufferAvailabilityListener availabilityListener) { - throw new UnsupportedOperationException("Not supported."); - } - - @Override - public void notifyEndOfData() throws IOException { - if (!delegation.isEndOfDataNotified()) { - broadcastEvent(EndOfData.INSTANCE, false); - delegation.setEndOfDataNotified(true); - } - } - - @Override - public CompletableFuture getAllDataProcessedFuture() { - return CompletableFuture.completedFuture(null); - } - - @Override - public String toString() { - return "ResultPartition " - + partitionId.toString() - + " [" - + partitionType - + ", " - + numSubpartitions - + " subpartitions, shuffle-descriptor: " - + delegation.getOutputGate().getShuffleDesc() - + "]"; - } - - @VisibleForTesting - public RemoteShuffleResultPartitionDelegation getDelegation() { - return delegation; - } - - public void updateStatistics(BufferWithSubpartition bufferWithSubpartition, boolean isBroadcast) { - numBuffersOut.inc(isBroadcast ? numSubpartitions : 1); - long readableBytes = - (long) bufferWithSubpartition.getBuffer().readableBytes() - BufferUtils.HEADER_LENGTH; - numBytesOut.inc(isBroadcast ? readableBytes * numSubpartitions : readableBytes); - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java deleted file mode 100644 index 23e4d3d6731..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.io.IOException; -import java.util.List; - -import org.apache.flink.runtime.io.network.buffer.BufferCompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.buffer.BufferPoolFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartition; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.common.protocol.CompressionCodec; - -/** Factory class to create {@link RemoteShuffleResultPartition}. */ -public class RemoteShuffleResultPartitionFactory - extends AbstractRemoteShuffleResultPartitionFactory { - - public RemoteShuffleResultPartitionFactory( - CelebornConf celebornConf, - ResultPartitionManager partitionManager, - BufferPoolFactory bufferPoolFactory, - int networkBufferSize) { - super(celebornConf, partitionManager, bufferPoolFactory, networkBufferSize); - } - - @Override - public ResultPartition createRemoteShuffleResultPartitionInternal( - String taskNameWithSubtaskAndId, - int partitionIndex, - ResultPartitionID id, - ResultPartitionType type, - int numSubpartitions, - int maxParallelism, - List> bufferPoolFactories, - CelebornConf celebornConf, - int numMappers, - BufferCompressor bufferCompressor, - RemoteShuffleDescriptor rsd) { - return new RemoteShuffleResultPartition( - taskNameWithSubtaskAndId, - partitionIndex, - id, - type, - numSubpartitions, - maxParallelism, - networkBufferSize, - partitionManager, - bufferCompressor, - bufferPoolFactories.get(0), - new RemoteShuffleOutputGate( - rsd, - numSubpartitions, - networkBufferSize, - bufferPoolFactories.get(1), - celebornConf, - numMappers)); - } - - protected BufferCompressor getBufferCompressor() { - return CompressionCodec.NONE.name().equals(compressionCodec) - ? null - : new BufferCompressor(networkBufferSize, compressionCodec); - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java deleted file mode 100644 index ee55098940b..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import org.apache.flink.runtime.io.network.NettyShuffleServiceFactory; -import org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.shuffle.*; - -import org.apache.celeborn.plugin.flink.netty.NettyShuffleEnvironmentWrapper; - -public class RemoteShuffleServiceFactory extends AbstractRemoteShuffleServiceFactory - implements ShuffleServiceFactory { - - private final NettyShuffleServiceFactory nettyShuffleServiceFactory = - new NettyShuffleServiceFactory(); - - @Override - public ShuffleMaster createShuffleMaster( - ShuffleMasterContext shuffleMasterContext) { - return new RemoteShuffleMaster( - shuffleMasterContext, new SimpleResultPartitionAdapter(), nettyShuffleServiceFactory); - } - - @Override - public ShuffleEnvironment createShuffleEnvironment( - ShuffleEnvironmentContext shuffleEnvironmentContext) { - AbstractRemoteShuffleServiceParameters parameters = - initializePreCreateShuffleEnvironment(shuffleEnvironmentContext); - RemoteShuffleResultPartitionFactory resultPartitionFactory = - new RemoteShuffleResultPartitionFactory( - parameters.celebornConf, - parameters.resultPartitionManager, - parameters.networkBufferPool, - parameters.bufferSize); - RemoteShuffleInputGateFactory inputGateFactory = - new RemoteShuffleInputGateFactory( - parameters.celebornConf, parameters.networkBufferPool, parameters.bufferSize); - - return new RemoteShuffleEnvironment( - parameters.networkBufferPool, - parameters.resultPartitionManager, - resultPartitionFactory, - inputGateFactory, - parameters.celebornConf, - new NettyShuffleEnvironmentWrapper(nettyShuffleServiceFactory, shuffleEnvironmentContext)); - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java deleted file mode 100644 index e1be2229996..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; - -public class SimpleResultPartitionAdapter implements ResultPartitionAdapter { - @Override - public boolean isBlockingResultPartition(ResultPartitionType partitionType) { - return partitionType.isBlocking(); - } -} diff --git a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java b/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java deleted file mode 100644 index 3706feeffb5..00000000000 --- a/client-flink/flink-1.14/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink.netty; - -import org.apache.flink.runtime.io.network.NettyShuffleEnvironment; -import org.apache.flink.runtime.io.network.NettyShuffleServiceFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartitionFactory; -import org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateFactory; -import org.apache.flink.runtime.shuffle.ShuffleEnvironmentContext; - -import org.apache.celeborn.reflect.DynFields; - -/** - * The wrapper of {@link NettyShuffleEnvironment} to generate {@link ResultPartitionFactory} and - * {@link SingleInputGateFactory}. - */ -public class NettyShuffleEnvironmentWrapper { - - private final NettyShuffleServiceFactory nettyShuffleServiceFactory; - private final ShuffleEnvironmentContext shuffleEnvironmentContext; - - private volatile NettyShuffleEnvironment nettyShuffleEnvironment; - private volatile ResultPartitionFactory nettyResultPartitionFactory; - private volatile SingleInputGateFactory nettyInputGateFactory; - - private static final DynFields.UnboundField - RESULT_PARTITION_FACTORY_FIELD = - DynFields.builder() - .hiddenImpl(NettyShuffleEnvironment.class, "resultPartitionFactory") - .defaultAlwaysNull() - .build(); - - private static final DynFields.UnboundField INPUT_GATE_FACTORY_FIELD = - DynFields.builder() - .hiddenImpl(NettyShuffleEnvironment.class, "singleInputGateFactory") - .defaultAlwaysNull() - .build(); - - public NettyShuffleEnvironmentWrapper( - NettyShuffleServiceFactory nettyShuffleServiceFactory, - ShuffleEnvironmentContext shuffleEnvironmentContext) { - this.nettyShuffleServiceFactory = nettyShuffleServiceFactory; - this.shuffleEnvironmentContext = shuffleEnvironmentContext; - } - - public NettyShuffleEnvironment nettyShuffleEnvironment() { - if (nettyShuffleEnvironment == null) { - synchronized (this) { - if (nettyShuffleEnvironment == null) { - nettyShuffleEnvironment = - nettyShuffleServiceFactory.createShuffleEnvironment(shuffleEnvironmentContext); - } - } - } - return nettyShuffleEnvironment; - } - - public ResultPartitionFactory nettyResultPartitionFactory() { - if (nettyResultPartitionFactory == null) { - synchronized (this) { - if (nettyResultPartitionFactory == null) { - nettyResultPartitionFactory = - RESULT_PARTITION_FACTORY_FIELD.bind(nettyShuffleEnvironment()).get(); - } - } - } - return nettyResultPartitionFactory; - } - - public SingleInputGateFactory nettyInputGateFactory() { - if (nettyInputGateFactory == null) { - synchronized (this) { - if (nettyInputGateFactory == null) { - nettyInputGateFactory = INPUT_GATE_FACTORY_FIELD.bind(nettyShuffleEnvironment()).get(); - } - } - } - return nettyInputGateFactory; - } -} diff --git a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java b/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java deleted file mode 100644 index 2d80815b5a3..00000000000 --- a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; - -import org.apache.flink.api.common.BatchShuffleMode; -import org.apache.flink.api.common.JobID; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.ExecutionOptions; -import org.apache.flink.configuration.MemorySize; -import org.apache.flink.runtime.clusterframework.types.ResourceID; -import org.apache.flink.runtime.executiongraph.ExecutionAttemptID; -import org.apache.flink.runtime.io.network.NettyShuffleServiceFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; -import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID; -import org.apache.flink.runtime.shuffle.*; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.common.protocol.FallbackPolicy; -import org.apache.celeborn.common.util.Utils$; -import org.apache.celeborn.plugin.flink.fallback.ForceFallbackPolicy; -import org.apache.celeborn.plugin.flink.utils.FlinkUtils; - -public class RemoteShuffleMasterSuiteJ { - - private static final Logger LOG = LoggerFactory.getLogger(RemoteShuffleMasterSuiteJ.class); - private RemoteShuffleMaster remoteShuffleMaster; - private Configuration configuration; - - @Before - public void setUp() { - configuration = new Configuration(); - int startPort = Utils$.MODULE$.selectRandomInt(1024, 65535); - configuration.setInteger("celeborn.master.port", startPort); - configuration.setString("celeborn.master.endpoints", "localhost:" + startPort); - configuration.setString("celeborn.client.application.heartbeatInterval", "30s"); - remoteShuffleMaster = createShuffleMaster(configuration); - } - - @Test - public void testRegisterJob() { - JobShuffleContext jobShuffleContext = createJobShuffleContext(JobID.generate()); - remoteShuffleMaster.registerJob(jobShuffleContext); - - // reRunRegister job - try { - remoteShuffleMaster.registerJob(jobShuffleContext); - } catch (Exception e) { - Assert.assertTrue(true); - } - - // unRegister job - remoteShuffleMaster.unregisterJob(jobShuffleContext.getJobId()); - remoteShuffleMaster.registerJob(jobShuffleContext); - } - - @Test - public void testRegisterJobWithForceFallbackPolicy() { - configuration.setString( - CelebornConf.FLINK_SHUFFLE_FALLBACK_POLICY().key(), FallbackPolicy.ALWAYS.name()); - remoteShuffleMaster = createShuffleMaster(configuration, new NettyShuffleServiceFactory()); - JobID jobID = JobID.generate(); - JobShuffleContext jobShuffleContext = createJobShuffleContext(jobID); - remoteShuffleMaster.registerJob(jobShuffleContext); - Assert.assertTrue(remoteShuffleMaster.jobFallbackPolicies().containsKey(jobID)); - remoteShuffleMaster.unregisterJob(jobShuffleContext.getJobId()); - Assert.assertTrue(remoteShuffleMaster.jobFallbackPolicies().isEmpty()); - } - - @Test - public void testRegisterPartitionWithProducer() - throws UnknownHostException, ExecutionException, InterruptedException { - JobID jobID = JobID.generate(); - JobShuffleContext jobShuffleContext = createJobShuffleContext(jobID); - remoteShuffleMaster.registerJob(jobShuffleContext); - - IntermediateDataSetID intermediateDataSetID = new IntermediateDataSetID(); - PartitionDescriptor partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 0); - ProducerDescriptor producerDescriptor = createProducerDescriptor(); - RemoteShuffleDescriptor remoteShuffleDescriptor = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertEquals(1, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - ShuffleResource shuffleResource = remoteShuffleDescriptor.getShuffleResource(); - ShuffleResourceDescriptor mapPartitionShuffleDescriptor = - shuffleResource.getMapPartitionShuffleDescriptor(); - - LOG.info("remoteShuffleDescriptor:{}", remoteShuffleDescriptor); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getShuffleId()); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getPartitionId()); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getAttemptId()); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getMapId()); - - // use same dataset id - partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 1); - remoteShuffleDescriptor = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertEquals(2, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - mapPartitionShuffleDescriptor = - remoteShuffleDescriptor.getShuffleResource().getMapPartitionShuffleDescriptor(); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getShuffleId()); - Assert.assertEquals(1, mapPartitionShuffleDescriptor.getMapId()); - - // use another attemptId - producerDescriptor = createProducerDescriptor(); - remoteShuffleDescriptor = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertEquals(3, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - mapPartitionShuffleDescriptor = - remoteShuffleDescriptor.getShuffleResource().getMapPartitionShuffleDescriptor(); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getShuffleId()); - Assert.assertEquals(1, mapPartitionShuffleDescriptor.getAttemptId()); - Assert.assertEquals(1, mapPartitionShuffleDescriptor.getMapId()); - } - - @Test - public void testRegisterPartitionWithProducerForForceFallbackPolicy() - throws UnknownHostException, ExecutionException, InterruptedException { - configuration.setString( - CelebornConf.FLINK_SHUFFLE_FALLBACK_POLICY().key(), FallbackPolicy.ALWAYS.name()); - remoteShuffleMaster = createShuffleMaster(configuration, new NettyShuffleServiceFactory()); - JobID jobID = JobID.generate(); - JobShuffleContext jobShuffleContext = createJobShuffleContext(jobID); - remoteShuffleMaster.registerJob(jobShuffleContext); - - IntermediateDataSetID intermediateDataSetID = new IntermediateDataSetID(); - PartitionDescriptor partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 0); - ProducerDescriptor producerDescriptor = createProducerDescriptor(); - ShuffleDescriptor shuffleDescriptor = - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertTrue(shuffleDescriptor instanceof NettyShuffleDescriptor); - Assert.assertEquals(1, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - Map shuffleFallbackCounts = - remoteShuffleMaster.lifecycleManager().shuffleFallbackCounts(); - Assert.assertEquals(1, shuffleFallbackCounts.size()); - Assert.assertEquals( - 1L, shuffleFallbackCounts.get(ForceFallbackPolicy.class.getName()).longValue()); - } - - @Test - public void testRegisterMultipleJobs() - throws UnknownHostException, ExecutionException, InterruptedException { - JobID jobID1 = JobID.generate(); - JobShuffleContext jobShuffleContext1 = createJobShuffleContext(jobID1); - remoteShuffleMaster.registerJob(jobShuffleContext1); - - JobID jobID2 = JobID.generate(); - JobShuffleContext jobShuffleContext2 = createJobShuffleContext(jobID2); - remoteShuffleMaster.registerJob(jobShuffleContext2); - - IntermediateDataSetID intermediateDataSetID = new IntermediateDataSetID(); - PartitionDescriptor partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 0); - ProducerDescriptor producerDescriptor = createProducerDescriptor(); - RemoteShuffleDescriptor remoteShuffleDescriptor1 = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID1, partitionDescriptor, producerDescriptor) - .get(); - - // use same datasetId but different jobId - RemoteShuffleDescriptor remoteShuffleDescriptor2 = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID2, partitionDescriptor, producerDescriptor) - .get(); - - Assert.assertEquals( - remoteShuffleDescriptor1 - .getShuffleResource() - .getMapPartitionShuffleDescriptor() - .getShuffleId(), - 0); - Assert.assertEquals( - remoteShuffleDescriptor2 - .getShuffleResource() - .getMapPartitionShuffleDescriptor() - .getShuffleId(), - 1); - } - - @Test - public void testShuffleMemoryAnnouncing() { - Map numberOfInputGateChannels = new HashMap<>(); - Map numbersOfResultSubpartitions = new HashMap<>(); - Map resultPartitionTypes = new HashMap<>(); - IntermediateDataSetID inputDataSetID0 = new IntermediateDataSetID(); - IntermediateDataSetID inputDataSetID1 = new IntermediateDataSetID(); - IntermediateDataSetID outputDataSetID0 = new IntermediateDataSetID(); - IntermediateDataSetID outputDataSetID1 = new IntermediateDataSetID(); - IntermediateDataSetID outputDataSetID2 = new IntermediateDataSetID(); - Random random = new Random(); - numberOfInputGateChannels.put(inputDataSetID0, random.nextInt(1000)); - numberOfInputGateChannels.put(inputDataSetID1, random.nextInt(1000)); - numbersOfResultSubpartitions.put(outputDataSetID0, random.nextInt(1000)); - numbersOfResultSubpartitions.put(outputDataSetID1, random.nextInt(1000)); - numbersOfResultSubpartitions.put(outputDataSetID2, random.nextInt(1000)); - resultPartitionTypes.put(outputDataSetID0, ResultPartitionType.BLOCKING); - resultPartitionTypes.put(outputDataSetID1, ResultPartitionType.BLOCKING); - resultPartitionTypes.put(outputDataSetID2, ResultPartitionType.BLOCKING); - MemorySize calculated = - remoteShuffleMaster.computeShuffleMemorySizeForTask( - TaskInputsOutputsDescriptor.from( - numberOfInputGateChannels, numbersOfResultSubpartitions, resultPartitionTypes)); - - CelebornConf conf = FlinkUtils.toCelebornConf(configuration); - - long numBytesPerGate = conf.clientFlinkMemoryPerInputGate(); - long expectedInput = 2 * numBytesPerGate; - - long numBytesPerResultPartition = conf.clientFlinkMemoryPerResultPartition(); - long expectedOutput = 3 * numBytesPerResultPartition; - MemorySize expected = new MemorySize(expectedInput + expectedOutput); - - Assert.assertEquals(expected, calculated); - } - - @Test - public void testInvalidShuffleConfig() { - Assert.assertThrows( - String.format( - "The config option %s should configure as %s", - ExecutionOptions.BATCH_SHUFFLE_MODE.key(), - BatchShuffleMode.ALL_EXCHANGES_BLOCKING.name()), - IllegalArgumentException.class, - () -> - createShuffleMaster( - new Configuration() - .set( - ExecutionOptions.BATCH_SHUFFLE_MODE, - BatchShuffleMode.ALL_EXCHANGES_PIPELINED))); - Configuration configuration = new Configuration(); - configuration.setString(CelebornConf.CLIENT_PUSH_REPLICATE_ENABLED().key(), "true"); - Assert.assertThrows( - String.format( - "Flink does not support replicate shuffle data. Please check the config %s.", - CelebornConf.CLIENT_PUSH_REPLICATE_ENABLED().key()), - IllegalArgumentException.class, - () -> createShuffleMaster(configuration)); - } - - @After - public void tearDown() { - if (remoteShuffleMaster != null) { - try { - remoteShuffleMaster.close(); - } catch (Exception e) { - LOG.warn(e.getMessage(), e); - } - } - } - - public RemoteShuffleMaster createShuffleMaster(Configuration configuration) { - return createShuffleMaster(configuration, null); - } - - public RemoteShuffleMaster createShuffleMaster( - Configuration configuration, NettyShuffleServiceFactory nettyShuffleServiceFactory) { - remoteShuffleMaster = - new RemoteShuffleMaster( - new ShuffleMasterContext() { - @Override - public Configuration getConfiguration() { - return configuration; - } - - @Override - public void onFatalError(Throwable throwable) { - System.exit(-1); - } - }, - new SimpleResultPartitionAdapter(), - nettyShuffleServiceFactory); - - return remoteShuffleMaster; - } - - public JobShuffleContext createJobShuffleContext(JobID jobId) { - return new JobShuffleContext() { - @Override - public org.apache.flink.api.common.JobID getJobId() { - return jobId; - } - - @Override - public CompletableFuture stopTrackingAndReleasePartitions( - Collection collection) { - return CompletableFuture.completedFuture(null); - } - }; - } - - public PartitionDescriptor createPartitionDescriptor( - IntermediateDataSetID intermediateDataSetId, int partitionNum) { - IntermediateResultPartitionID intermediateResultPartitionId = - new IntermediateResultPartitionID(intermediateDataSetId, partitionNum); - return new PartitionDescriptor( - intermediateDataSetId, - 10, - intermediateResultPartitionId, - ResultPartitionType.BLOCKING, - 5, - 1); - } - - public ProducerDescriptor createProducerDescriptor() throws UnknownHostException { - ExecutionAttemptID executionAttemptId = new ExecutionAttemptID(); - return new ProducerDescriptor( - ResourceID.generate(), executionAttemptId, InetAddress.getLocalHost(), 100); - } -} diff --git a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java b/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java deleted file mode 100644 index 677a7aabac2..00000000000 --- a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.mockito.Mockito.mock; - -import org.apache.flink.configuration.IllegalConfigurationException; -import org.apache.flink.runtime.io.network.buffer.BufferPoolFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.junit.Assert; -import org.junit.Test; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.common.protocol.CompressionCodec; - -/** Tests for {@link RemoteShuffleResultPartitionFactory}. */ -public class RemoteShuffleResultPartitionFactorySuiteJ { - - @Test - public void testGetBufferCompressor() { - CelebornConf celebornConf = new CelebornConf(); - for (CompressionCodec compressionCodec : CompressionCodec.values()) { - RemoteShuffleResultPartitionFactory partitionFactory = - new RemoteShuffleResultPartitionFactory( - celebornConf.set( - CelebornConf.SHUFFLE_COMPRESSION_CODEC().key(), compressionCodec.name()), - mock(ResultPartitionManager.class), - mock(BufferPoolFactory.class), - 1); - if (CompressionCodec.NONE.equals(compressionCodec)) { - Assert.assertNull(partitionFactory.getBufferCompressor()); - } else if (CompressionCodec.LZ4.equals(compressionCodec)) { - Assert.assertNotNull(partitionFactory.getBufferCompressor()); - } else { - Assert.assertThrows( - IllegalConfigurationException.class, partitionFactory::getBufferCompressor); - } - } - } -} diff --git a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java b/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java deleted file mode 100644 index 2147c688877..00000000000 --- a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java +++ /dev/null @@ -1,618 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.time.Duration; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.Random; -import java.util.Set; -import java.util.stream.IntStream; - -import org.apache.flink.api.common.JobID; -import org.apache.flink.core.memory.MemorySegment; -import org.apache.flink.core.memory.MemorySegmentFactory; -import org.apache.flink.runtime.io.network.api.EndOfPartitionEvent; -import org.apache.flink.runtime.io.network.api.serialization.EventSerializer; -import org.apache.flink.runtime.io.network.buffer.Buffer; -import org.apache.flink.runtime.io.network.buffer.BufferCompressor; -import org.apache.flink.runtime.io.network.buffer.BufferDecompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.buffer.NetworkBuffer; -import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.util.function.SupplierWithException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.plugin.flink.buffer.BufferPacker; -import org.apache.celeborn.plugin.flink.buffer.DataBuffer; -import org.apache.celeborn.plugin.flink.readclient.FlinkShuffleClientImpl; -import org.apache.celeborn.plugin.flink.utils.BufferUtils; - -public class RemoteShuffleResultPartitionSuiteJ { - private final int networkBufferSize = 32 * 1024; - private final BufferCompressor bufferCompressor = new BufferCompressor(networkBufferSize, "lz4"); - private final RemoteShuffleOutputGate remoteShuffleOutputGate = - mock(RemoteShuffleOutputGate.class); - private final CelebornConf conf = new CelebornConf(); - BufferDecompressor bufferDecompressor = new BufferDecompressor(networkBufferSize, "LZ4"); - - private static final int totalBuffers = 1000; - - private static final int bufferSize = 1024; - - private NetworkBufferPool globalBufferPool; - - private BufferPool sortBufferPool; - - private BufferPool nettyBufferPool; - - private RemoteShuffleResultPartition partitionWriter; - - private FakedRemoteShuffleOutputGate outputGate; - - @Before - public void setup() { - globalBufferPool = new NetworkBufferPool(totalBuffers, bufferSize); - } - - @After - public void tearDown() throws Exception { - if (outputGate != null) { - outputGate.release(); - } - - if (sortBufferPool != null) { - sortBufferPool.lazyDestroy(); - } - if (nettyBufferPool != null) { - nettyBufferPool.lazyDestroy(); - } - assertEquals(totalBuffers, globalBufferPool.getNumberOfAvailableMemorySegments()); - globalBufferPool.destroy(); - } - - @Test - public void tesSimpleFlush() throws IOException, InterruptedException { - List> bufferPool = createBufferPoolFactory(); - RemoteShuffleResultPartition remoteShuffleResultPartition = - new RemoteShuffleResultPartition( - "test", - 0, - new ResultPartitionID(), - ResultPartitionType.BLOCKING, - 2, - 2, - 32 * 1024, - new ResultPartitionManager(), - bufferCompressor, - bufferPool.get(0), - remoteShuffleOutputGate); - remoteShuffleResultPartition.setup(); - doNothing().when(remoteShuffleOutputGate).regionStart(anyBoolean()); - doNothing().when(remoteShuffleOutputGate).regionFinish(); - when(remoteShuffleOutputGate.getBufferPool()).thenReturn(bufferPool.get(1).get()); - DataBuffer dataBuffer = remoteShuffleResultPartition.getDelegation().getUnicastDataBuffer(); - ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[] {1, 2, 3}); - dataBuffer.append(byteBuffer, 0, Buffer.DataType.DATA_BUFFER); - remoteShuffleResultPartition.getDelegation().flushDataBuffer(dataBuffer, true); - } - - private List> createBufferPoolFactory() { - NetworkBufferPool networkBufferPool = - new NetworkBufferPool(256 * 8, 32 * 1024, Duration.ofSeconds(1)); - - int numBuffersPerPartition = 64 * 1024 / 32; - int numForResultPartition = numBuffersPerPartition * 7 / 8; - int numForOutputGate = numBuffersPerPartition - numForResultPartition; - - List> factories = new ArrayList<>(); - factories.add( - () -> networkBufferPool.createBufferPool(numForResultPartition, numForResultPartition)); - factories.add(() -> networkBufferPool.createBufferPool(numForOutputGate, numForOutputGate)); - return factories; - } - - @Test - public void testWriteNormalRecordWithCompressionEnabled() throws Exception { - testWriteNormalRecord(true); - } - - @Test - public void testWriteNormalRecordWithCompressionDisabled() throws Exception { - testWriteNormalRecord(false); - } - - @Test - public void testWriteLargeRecord() throws Exception { - int numSubpartitions = 2; - int numBuffers = 100; - initResultPartitionWriter(numSubpartitions, 10, 200, false, conf, 10); - - partitionWriter.setup(); - - byte[] dataWritten = new byte[bufferSize * numBuffers]; - Random random = new Random(); - random.nextBytes(dataWritten); - ByteBuffer recordWritten = ByteBuffer.wrap(dataWritten); - partitionWriter.emitRecord(recordWritten, 0); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.finish(); - partitionWriter.close(); - - List receivedBuffers = outputGate.getReceivedBuffers()[0]; - - ByteBuffer recordRead = ByteBuffer.allocate(bufferSize * numBuffers); - for (Buffer buffer : receivedBuffers) { - if (buffer.isBuffer()) { - recordRead.put( - buffer.getNioBuffer( - BufferUtils.HEADER_LENGTH, buffer.readableBytes() - BufferUtils.HEADER_LENGTH)); - } - } - recordWritten.rewind(); - recordRead.flip(); - assertEquals(recordWritten, recordRead); - } - - @Test - public void testBroadcastLargeRecord() throws Exception { - int numSubpartitions = 2; - int numBuffers = 100; - initResultPartitionWriter(numSubpartitions, 10, 200, false, conf, 10); - - partitionWriter.setup(); - - byte[] dataWritten = new byte[bufferSize * numBuffers]; - Random random = new Random(); - random.nextBytes(dataWritten); - ByteBuffer recordWritten = ByteBuffer.wrap(dataWritten); - partitionWriter.broadcastRecord(recordWritten); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.finish(); - partitionWriter.close(); - - ByteBuffer recordRead0 = ByteBuffer.allocate(bufferSize * numBuffers); - for (Buffer buffer : outputGate.getReceivedBuffers()[0]) { - if (buffer.isBuffer()) { - recordRead0.put( - buffer.getNioBuffer( - BufferUtils.HEADER_LENGTH, buffer.readableBytes() - BufferUtils.HEADER_LENGTH)); - } - } - recordWritten.rewind(); - recordRead0.flip(); - assertEquals(recordWritten, recordRead0); - - ByteBuffer recordRead1 = ByteBuffer.allocate(bufferSize * numBuffers); - for (Buffer buffer : outputGate.getReceivedBuffers()[1]) { - if (buffer.isBuffer()) { - recordRead1.put( - buffer.getNioBuffer( - BufferUtils.HEADER_LENGTH, buffer.readableBytes() - BufferUtils.HEADER_LENGTH)); - } - } - recordWritten.rewind(); - recordRead1.flip(); - assertEquals(recordWritten, recordRead0); - } - - @Test - public void testFlush() throws Exception { - int numSubpartitions = 10; - - initResultPartitionWriter(numSubpartitions, 10, 20, false, conf, 10); - partitionWriter.setup(); - - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 0); - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 1); - assertEquals(3, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.broadcastRecord(ByteBuffer.allocate(bufferSize)); - assertEquals(2, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.flush(0); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 2); - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 3); - assertEquals(3, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.flushAll(); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.finish(); - partitionWriter.close(); - } - - private void testWriteNormalRecord(boolean compressionEnabled) throws Exception { - int numSubpartitions = 4; - int numRecords = 100; - Random random = new Random(); - - initResultPartitionWriter(numSubpartitions, 100, 500, compressionEnabled, conf, 10); - partitionWriter.setup(); - assertTrue(outputGate.isSetup()); - - Queue[] dataWritten = new Queue[numSubpartitions]; - IntStream.range(0, numSubpartitions).forEach(i -> dataWritten[i] = new ArrayDeque<>()); - int[] numBytesWritten = new int[numSubpartitions]; - Arrays.fill(numBytesWritten, 0); - - for (int i = 0; i < numRecords; i++) { - byte[] data = new byte[random.nextInt(2 * bufferSize) + 1]; - if (compressionEnabled) { - byte randomByte = (byte) random.nextInt(); - Arrays.fill(data, randomByte); - } else { - random.nextBytes(data); - } - ByteBuffer record = ByteBuffer.wrap(data); - boolean isBroadCast = random.nextBoolean(); - - if (isBroadCast) { - partitionWriter.broadcastRecord(record); - IntStream.range(0, numSubpartitions) - .forEach( - subpartition -> - recordDataWritten( - record, - Buffer.DataType.DATA_BUFFER, - subpartition, - dataWritten, - numBytesWritten)); - } else { - int subpartition = random.nextInt(numSubpartitions); - partitionWriter.emitRecord(record, subpartition); - recordDataWritten( - record, Buffer.DataType.DATA_BUFFER, subpartition, dataWritten, numBytesWritten); - } - } - - partitionWriter.finish(); - assertTrue(outputGate.isFinished()); - partitionWriter.close(); - assertTrue(outputGate.isClosed()); - - for (int subpartition = 0; subpartition < numSubpartitions; ++subpartition) { - ByteBuffer record = EventSerializer.toSerializedEvent(EndOfPartitionEvent.INSTANCE); - recordDataWritten( - record, Buffer.DataType.EVENT_BUFFER, subpartition, dataWritten, numBytesWritten); - } - - outputGate - .getFinishedRegions() - .forEach( - regionIndex -> assertTrue(outputGate.getNumBuffersByRegion().containsKey(regionIndex))); - - int[] numBytesRead = new int[numSubpartitions]; - List[] receivedBuffers = outputGate.getReceivedBuffers(); - List[] validateTarget = new List[numSubpartitions]; - Arrays.fill(numBytesRead, 0); - for (int i = 0; i < numSubpartitions; i++) { - validateTarget[i] = new ArrayList<>(); - for (Buffer buffer : receivedBuffers[i]) { - for (Buffer unpackedBuffer : BufferPacker.unpack(buffer.asByteBuf())) { - if (compressionEnabled && unpackedBuffer.isCompressed()) { - Buffer decompressedBuffer = - bufferDecompressor.decompressToIntermediateBuffer(unpackedBuffer); - ByteBuffer decompressed = decompressedBuffer.getNioBufferReadable(); - int numBytes = decompressed.remaining(); - MemorySegment segment = MemorySegmentFactory.allocateUnpooledSegment(numBytes); - segment.put(0, decompressed, numBytes); - decompressedBuffer.recycleBuffer(); - validateTarget[i].add( - new NetworkBuffer(segment, buf -> {}, unpackedBuffer.getDataType(), numBytes)); - numBytesRead[i] += numBytes; - } else { - numBytesRead[i] += buffer.readableBytes(); - validateTarget[i].add(buffer); - } - } - } - } - IntStream.range(0, numSubpartitions).forEach(subpartitions -> {}); - checkWriteReadResult( - numSubpartitions, numBytesWritten, numBytesWritten, dataWritten, validateTarget); - } - - private void initResultPartitionWriter( - int numSubpartitions, - int sortBufferPoolSize, - int nettyBufferPoolSize, - boolean compressionEnabled, - CelebornConf conf, - int numMappers) - throws Exception { - - sortBufferPool = globalBufferPool.createBufferPool(sortBufferPoolSize, sortBufferPoolSize); - nettyBufferPool = globalBufferPool.createBufferPool(nettyBufferPoolSize, nettyBufferPoolSize); - - outputGate = - new FakedRemoteShuffleOutputGate( - getShuffleDescriptor(), numSubpartitions, () -> nettyBufferPool, conf, numMappers); - outputGate.setup(); - - if (compressionEnabled) { - partitionWriter = - new RemoteShuffleResultPartition( - "RemoteShuffleResultPartitionWriterTest", - 0, - new ResultPartitionID(), - ResultPartitionType.BLOCKING, - numSubpartitions, - numSubpartitions, - bufferSize, - new ResultPartitionManager(), - bufferCompressor, - () -> sortBufferPool, - outputGate); - } else { - partitionWriter = - new RemoteShuffleResultPartition( - "RemoteShuffleResultPartitionWriterTest", - 0, - new ResultPartitionID(), - ResultPartitionType.BLOCKING, - numSubpartitions, - numSubpartitions, - bufferSize, - new ResultPartitionManager(), - null, - () -> sortBufferPool, - outputGate); - } - } - - private void recordDataWritten( - ByteBuffer record, - Buffer.DataType dataType, - int subpartition, - Queue[] dataWritten, - int[] numBytesWritten) { - - record.rewind(); - dataWritten[subpartition].add(new DataAndType(record, dataType)); - numBytesWritten[subpartition] += record.remaining(); - } - - private static class FakedRemoteShuffleOutputGate extends RemoteShuffleOutputGate { - - private boolean isSetup; - private boolean isFinished; - private boolean isClosed; - private final List[] receivedBuffers; - private final Map numBuffersByRegion; - private final Set finishedRegions; - private int currentRegionIndex; - private boolean currentIsBroadcast; - - FakedRemoteShuffleOutputGate( - RemoteShuffleDescriptor shuffleDescriptor, - int numSubpartitions, - SupplierWithException bufferPoolFactory, - CelebornConf celebornConf, - int numMappers) { - - super( - shuffleDescriptor, - numSubpartitions, - bufferSize, - bufferPoolFactory, - celebornConf, - numMappers); - isSetup = false; - isFinished = false; - isClosed = false; - numBuffersByRegion = new HashMap<>(); - finishedRegions = new HashSet<>(); - currentRegionIndex = -1; - receivedBuffers = new ArrayList[numSubpartitions]; - IntStream.range(0, numSubpartitions).forEach(i -> receivedBuffers[i] = new ArrayList<>()); - currentIsBroadcast = false; - } - - @Override - FlinkShuffleClientImpl getShuffleClient() { - FlinkShuffleClientImpl client = mock(FlinkShuffleClientImpl.class); - doNothing().when(client).cleanup(anyInt(), anyInt(), anyInt()); - return client; - } - - @Override - public void setup() throws IOException, InterruptedException { - bufferPool = bufferPoolFactory.get(); - isSetup = true; - } - - @Override - public void write(Buffer buffer, int subIdx) { - if (currentIsBroadcast) { - assertEquals(0, subIdx); - ByteBuffer byteBuffer = buffer.getNioBufferReadable(); - for (int i = 0; i < numSubs; i++) { - int numBytes = buffer.readableBytes(); - MemorySegment segment = MemorySegmentFactory.allocateUnpooledSegment(numBytes); - byteBuffer.rewind(); - segment.put(0, byteBuffer, numBytes); - receivedBuffers[i].add( - new NetworkBuffer( - segment, buf -> {}, buffer.getDataType(), buffer.isCompressed(), numBytes)); - } - buffer.recycleBuffer(); - } else { - receivedBuffers[subIdx].add(buffer); - } - if (numBuffersByRegion.containsKey(currentRegionIndex)) { - int prev = numBuffersByRegion.get(currentRegionIndex); - numBuffersByRegion.put(currentRegionIndex, prev + 1); - } else { - numBuffersByRegion.put(currentRegionIndex, 1); - } - } - - @Override - public void regionStart(boolean isBroadcast) { - currentIsBroadcast = isBroadcast; - currentRegionIndex++; - } - - @Override - public void regionFinish() { - if (finishedRegions.contains(currentRegionIndex)) { - throw new IllegalStateException("Unexpected region: " + currentRegionIndex); - } - finishedRegions.add(currentRegionIndex); - } - - @Override - public void finish() throws InterruptedException { - isFinished = true; - } - - @Override - public void close() { - isClosed = true; - } - - public List[] getReceivedBuffers() { - return receivedBuffers; - } - - public Map getNumBuffersByRegion() { - return numBuffersByRegion; - } - - public Set getFinishedRegions() { - return finishedRegions; - } - - public boolean isSetup() { - return isSetup; - } - - public boolean isFinished() { - return isFinished; - } - - public boolean isClosed() { - return isClosed; - } - - public void release() throws Exception { - IntStream.range(0, numSubs) - .forEach( - subpartitionIndex -> { - receivedBuffers[subpartitionIndex].forEach(Buffer::recycleBuffer); - receivedBuffers[subpartitionIndex].clear(); - }); - numBuffersByRegion.clear(); - finishedRegions.clear(); - super.close(); - } - } - - private RemoteShuffleDescriptor getShuffleDescriptor() { - Random random = new Random(); - byte[] bytes = new byte[16]; - random.nextBytes(bytes); - return new RemoteShuffleDescriptor( - new JobID(bytes).toString(), - new JobID(bytes), - new JobID(bytes).toString(), - new ResultPartitionID(), - new RemoteShuffleResource( - "1", 2, System.currentTimeMillis(), new ShuffleResourceDescriptor(1, 1, 1, 0))); - } - - /** Data written and its {@link Buffer.DataType}. */ - public static class DataAndType { - private final ByteBuffer data; - private final Buffer.DataType dataType; - - DataAndType(ByteBuffer data, Buffer.DataType dataType) { - this.data = data; - this.dataType = dataType; - } - } - - public static void checkWriteReadResult( - int numSubpartitions, - int[] numBytesWritten, - int[] numBytesRead, - Queue[] dataWritten, - Collection[] buffersRead) { - for (int subpartitionIndex = 0; subpartitionIndex < numSubpartitions; ++subpartitionIndex) { - assertEquals(numBytesWritten[subpartitionIndex], numBytesRead[subpartitionIndex]); - - List eventsWritten = new ArrayList<>(); - List eventsRead = new ArrayList<>(); - - ByteBuffer subpartitionDataWritten = ByteBuffer.allocate(numBytesWritten[subpartitionIndex]); - for (DataAndType dataAndType : dataWritten[subpartitionIndex]) { - subpartitionDataWritten.put(dataAndType.data); - dataAndType.data.rewind(); - if (dataAndType.dataType.isEvent()) { - eventsWritten.add(dataAndType); - } - } - - ByteBuffer subpartitionDataRead = ByteBuffer.allocate(numBytesRead[subpartitionIndex]); - for (Buffer buffer : buffersRead[subpartitionIndex]) { - subpartitionDataRead.put(buffer.getNioBufferReadable()); - if (!buffer.isBuffer()) { - eventsRead.add(buffer); - } - } - - subpartitionDataWritten.flip(); - subpartitionDataRead.flip(); - assertEquals(subpartitionDataWritten, subpartitionDataRead); - - assertEquals(eventsWritten.size(), eventsRead.size()); - for (int i = 0; i < eventsWritten.size(); ++i) { - assertEquals(eventsWritten.get(i).dataType, eventsRead.get(i).getDataType()); - assertEquals(eventsWritten.get(i).data, eventsRead.get(i).getNioBufferReadable()); - } - } - } -} diff --git a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java b/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java deleted file mode 100644 index 49affdee4bd..00000000000 --- a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.MemorySize; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.shuffle.ShuffleEnvironment; -import org.apache.flink.runtime.shuffle.ShuffleEnvironmentContext; -import org.junit.Assert; -import org.junit.Test; - -public class RemoteShuffleServiceFactorySuiteJ { - @Test - public void testCreateShuffleEnvironment() { - RemoteShuffleServiceFactory remoteShuffleServiceFactory = new RemoteShuffleServiceFactory(); - ShuffleEnvironmentContext shuffleEnvironmentContext = mock(ShuffleEnvironmentContext.class); - when(shuffleEnvironmentContext.getConfiguration()).thenReturn(new Configuration()); - when(shuffleEnvironmentContext.getNetworkMemorySize()) - .thenReturn(new MemorySize(64 * 1024 * 1024)); - MetricGroup parentMetric = mock(MetricGroup.class); - when(shuffleEnvironmentContext.getParentMetricGroup()).thenReturn(parentMetric); - MetricGroup childGroup = mock(MetricGroup.class); - MetricGroup childChildGroup = mock(MetricGroup.class); - when(parentMetric.addGroup(anyString())).thenReturn(childGroup); - when(childGroup.addGroup(any())).thenReturn(childChildGroup); - when(childChildGroup.gauge(any(), any())).thenReturn(null); - ShuffleEnvironment shuffleEnvironment = - remoteShuffleServiceFactory.createShuffleEnvironment(shuffleEnvironmentContext); - Assert.assertEquals( - 32 * 1024, - ((RemoteShuffleEnvironment) shuffleEnvironment) - .getResultPartitionFactory() - .getNetworkBufferSize()); - } -} diff --git a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java b/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java deleted file mode 100644 index b93ae3ea152..00000000000 --- a/client-flink/flink-1.14/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ScheduledThreadPoolExecutor; - -import com.google.common.collect.Sets; -import org.apache.flink.api.common.JobID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.shuffle.JobShuffleContext; -import org.junit.Assert; -import org.junit.Test; -import org.mockito.Mockito; - -import org.apache.celeborn.client.LifecycleManager; -import org.apache.celeborn.client.listener.WorkersStatus; -import org.apache.celeborn.common.meta.ShufflePartitionLocationInfo; -import org.apache.celeborn.common.meta.WorkerInfo; -import org.apache.celeborn.common.protocol.PartitionLocation; -import org.apache.celeborn.common.util.JavaUtils; - -public class ShuffleResourceTrackerSuiteJ { - - @Test - public void testNotifyUnknownWorkers() { - LifecycleManager lifecycleManager = Mockito.mock(LifecycleManager.class); - ScheduledThreadPoolExecutor executor = Mockito.mock(ScheduledThreadPoolExecutor.class); - - ConcurrentHashMap map = JavaUtils.newConcurrentHashMap(); - WorkerInfo workerInfo = new WorkerInfo("mock", -1, -1, -1, -1); - map.put(workerInfo.toUniqueId(), mockShufflePartitionLocationInfo(workerInfo)); - - ConcurrentHashMap map2 = JavaUtils.newConcurrentHashMap(); - map2.put(workerInfo.toUniqueId(), mockShufflePartitionLocationInfo(workerInfo)); - - ConcurrentHashMap map3 = JavaUtils.newConcurrentHashMap(); - map3.put(workerInfo.toUniqueId(), mockShufflePartitionLocationInfo(workerInfo)); - - Mockito.when(lifecycleManager.workerSnapshots(Mockito.anyInt())).thenReturn(map, map2, map3); - - ShuffleResourceTracker shuffleResourceTracker = - new ShuffleResourceTracker(executor, lifecycleManager); - - JobID jobID1 = new JobID(); - shuffleResourceTracker.registerJob(createJobShuffleContext(jobID1)); - shuffleResourceTracker.addPartitionResource(jobID1, 1, 1, new ResultPartitionID()); - shuffleResourceTracker.addPartitionResource(jobID1, 1, 2, new ResultPartitionID()); - shuffleResourceTracker.addPartitionResource(jobID1, 1, 3, new ResultPartitionID()); - shuffleResourceTracker.addPartitionResource(jobID1, 2, 3, new ResultPartitionID()); - - JobID jobID2 = new JobID(); - shuffleResourceTracker.registerJob(createJobShuffleContext(jobID2)); - shuffleResourceTracker.addPartitionResource(jobID2, 3, 1, new ResultPartitionID()); - - List workerInfoList = new ArrayList<>(); - workerInfoList.add(workerInfo); - shuffleResourceTracker.notifyChangedWorkersStatus(new WorkersStatus(workerInfoList, null)); - - Assert.assertEquals( - Sets.newHashSet(3), - shuffleResourceTracker - .getJobResourceListener(jobID1) - .getResultPartitionMap() - .get(2) - .keySet()); - Assert.assertEquals( - Sets.newHashSet(3), - shuffleResourceTracker - .getJobResourceListener(jobID1) - .getResultPartitionMap() - .get(2) - .keySet()); - - Assert.assertTrue( - shuffleResourceTracker - .getJobResourceListener(jobID2) - .getResultPartitionMap() - .get(3) - .isEmpty()); - } - - public ShufflePartitionLocationInfo mockShufflePartitionLocationInfo(WorkerInfo workerInfo) { - ShufflePartitionLocationInfo shufflePartitionLocationInfo = - new ShufflePartitionLocationInfo(workerInfo); - - List primaryLocations = new ArrayList<>(); - primaryLocations.add(mockShufflePartitionLocationInfo(1)); - primaryLocations.add(mockShufflePartitionLocationInfo(2)); - - List replicaLocations = new ArrayList<>(); - replicaLocations.add(mockShufflePartitionLocationInfo(3)); - replicaLocations.add(mockShufflePartitionLocationInfo(4)); - - shufflePartitionLocationInfo.addPrimaryPartitions(primaryLocations); - shufflePartitionLocationInfo.addReplicaPartitions(replicaLocations); - return shufflePartitionLocationInfo; - } - - public JobShuffleContext createJobShuffleContext(JobID jobId) { - return new JobShuffleContext() { - @Override - public JobID getJobId() { - return jobId; - } - - @Override - public CompletableFuture stopTrackingAndReleasePartitions( - Collection collection) { - return CompletableFuture.completedFuture(null); - } - }; - } - - private PartitionLocation mockShufflePartitionLocationInfo(int partitionId) { - return new PartitionLocation( - partitionId, -1, "mock", -1, -1, -1, -1, PartitionLocation.Mode.PRIMARY); - } -} diff --git a/client-flink/flink-1.14/src/test/resources/log4j2-test.xml b/client-flink/flink-1.14/src/test/resources/log4j2-test.xml deleted file mode 100644 index 9adcdccfd0e..00000000000 --- a/client-flink/flink-1.14/src/test/resources/log4j2-test.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/client-flink/flink-1.15-shaded/pom.xml b/client-flink/flink-1.15-shaded/pom.xml deleted file mode 100644 index 5ef41fb5122..00000000000 --- a/client-flink/flink-1.15-shaded/pom.xml +++ /dev/null @@ -1,137 +0,0 @@ - - - - 4.0.0 - - org.apache.celeborn - celeborn-parent_${scala.binary.version} - ${project.version} - ../../pom.xml - - - celeborn-client-flink-1.15-shaded_${scala.binary.version} - jar - Celeborn Shaded Client for Flink 1.15 - - - - org.apache.celeborn - celeborn-client-flink-1.15_${scala.binary.version} - ${project.version} - - - - - - - org.apache.maven.plugins - maven-shade-plugin - - - - com.google.protobuf - ${shading.prefix}.com.google.protobuf - - - com.google.common - ${shading.prefix}.com.google.common - - - io.netty - ${shading.prefix}.io.netty - - - org.apache.commons - ${shading.prefix}.org.apache.commons - - - org.roaringbitmap - ${shading.prefix}.org.roaringbitmap - - - - - org.apache.celeborn:* - com.google.protobuf:protobuf-java - com.google.guava:guava - com.google.guava:failureaccess - io.netty:* - org.apache.commons:commons-lang3 - org.roaringbitmap:RoaringBitmap - - - - - *:* - - **/*.proto - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - **/log4j.properties - META-INF/LICENSE.txt - META-INF/NOTICE.txt - LICENSE.txt - NOTICE.txt - - - - - - - - - - org.apache.maven.plugins - maven-antrun-plugin - ${maven.plugin.antrun.version} - - - rename-native-library - - run - - package - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/client-flink/flink-1.15-shaded/src/main/resources/META-INF/LICENSE b/client-flink/flink-1.15-shaded/src/main/resources/META-INF/LICENSE deleted file mode 100644 index 924ef2c85f4..00000000000 --- a/client-flink/flink-1.15-shaded/src/main/resources/META-INF/LICENSE +++ /dev/null @@ -1,248 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ------------------------------------------------------------------------------------- -This project bundles the following dependencies under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0.txt): - - -Apache License 2.0 --------------------------------------- - -com.google.guava:failureaccess -com.google.guava:guava -io.netty:netty-all -io.netty:netty-buffer -io.netty:netty-codec -io.netty:netty-codec-dns -io.netty:netty-codec-haproxy -io.netty:netty-codec-http -io.netty:netty-codec-http2 -io.netty:netty-codec-memcache -io.netty:netty-codec-mqtt -io.netty:netty-codec-redis -io.netty:netty-codec-smtp -io.netty:netty-codec-socks -io.netty:netty-codec-stomp -io.netty:netty-codec-xml -io.netty:netty-common -io.netty:netty-handler -io.netty:netty-handler-proxy -io.netty:netty-resolver -io.netty:netty-resolver-dns -io.netty:netty-transport -io.netty:netty-transport-classes-epoll -io.netty:netty-transport-classes-kqueue -io.netty:netty-transport-native-epoll -io.netty:netty-transport-native-kqueue -io.netty:netty-transport-native-unix-common -io.netty:netty-transport-rxtx -io.netty:netty-transport-sctp -io.netty:netty-transport-udt -org.apache.commons:commons-lang3 -org.roaringbitmap:RoaringBitmap - - -BSD 3-clause ------------- -See licenses/LICENSE-protobuf.txt for details. -com.google.protobuf:protobuf-java diff --git a/client-flink/flink-1.15-shaded/src/main/resources/META-INF/NOTICE b/client-flink/flink-1.15-shaded/src/main/resources/META-INF/NOTICE deleted file mode 100644 index 43452a38afe..00000000000 --- a/client-flink/flink-1.15-shaded/src/main/resources/META-INF/NOTICE +++ /dev/null @@ -1,45 +0,0 @@ - -Apache Celeborn -Copyright 2022-2024 The Apache Software Foundation. - -This product includes software developed at -The Apache Software Foundation (https://www.apache.org/). - -Apache Spark -Copyright 2014 and onwards The Apache Software Foundation - -Apache Kyuubi -Copyright 2021-2023 The Apache Software Foundation - -Apache Iceberg -Copyright 2017-2022 The Apache Software Foundation - -Apache Parquet MR -Copyright 2014-2024 The Apache Software Foundation - -This project includes code from Kite, developed at Cloudera, Inc. with -the following copyright notice: - -| Copyright 2013 Cloudera Inc. -| -| Licensed under the Apache License, Version 2.0 (the "License"); -| you may not use this file except in compliance with the License. -| You may obtain a copy of the License at -| -| http://www.apache.org/licenses/LICENSE-2.0 -| -| Unless required by applicable law or agreed to in writing, software -| distributed under the License is distributed on an "AS IS" BASIS, -| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -| See the License for the specific language governing permissions and -| limitations under the License. - -Remote Shuffle Service for Flink -Copyright 2021 The Flink Remote Shuffle Project - -============================================================================= -= NOTICE file corresponding to section 4d of the Apache License Version 2.0 = -============================================================================= - -Apache Commons Lang -Copyright 2001-2021 The Apache Software Foundation diff --git a/client-flink/flink-1.15-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt b/client-flink/flink-1.15-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt deleted file mode 100644 index b4350ec83c7..00000000000 --- a/client-flink/flink-1.15-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt +++ /dev/null @@ -1,42 +0,0 @@ -This license applies to all parts of Protocol Buffers except the following: - - - Atomicops support for generic gcc, located in - src/google/protobuf/stubs/atomicops_internals_generic_gcc.h. - This file is copyrighted by Red Hat Inc. - - - Atomicops support for AIX/POWER, located in - src/google/protobuf/stubs/atomicops_internals_aix.h. - This file is copyrighted by Bloomberg Finance LP. - -Copyright 2014, Google Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Code generated by the Protocol Buffer compiler is owned by the owner -of the input file used when generating it. This code is not -standalone and requires a support library to be linked with it. This -support library is itself covered by the above license. \ No newline at end of file diff --git a/client-flink/flink-1.15/pom.xml b/client-flink/flink-1.15/pom.xml deleted file mode 100644 index 48d9e8d5227..00000000000 --- a/client-flink/flink-1.15/pom.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - 4.0.0 - - org.apache.celeborn - celeborn-parent_${scala.binary.version} - ${project.version} - ../../pom.xml - - - celeborn-client-flink-1.15_${scala.binary.version} - jar - Celeborn Client for Flink 1.15 - - - - org.apache.celeborn - celeborn-common_${scala.binary.version} - ${project.version} - - - org.apache.celeborn - celeborn-client_${scala.binary.version} - ${project.version} - - - org.apache.celeborn - celeborn-client-flink-common_${scala.binary.version} - ${project.version} - - - org.apache.flink - flink-runtime - provided - - - org.xerial.snappy - snappy-java - - - - - - org.mockito - mockito-core - test - - - org.apache.logging.log4j - log4j-slf4j-impl - test - - - org.apache.logging.log4j - log4j-1.2-api - test - - - diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java deleted file mode 100644 index 844fd276a99..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleEnvironment.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.util.Collection; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; - -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor; -import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor; -import org.apache.flink.runtime.io.network.NettyShuffleEnvironment; -import org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter; -import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool; -import org.apache.flink.runtime.io.network.metrics.InputChannelMetrics; -import org.apache.flink.runtime.io.network.partition.PartitionProducerStateProvider; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; -import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID; -import org.apache.flink.runtime.shuffle.ShuffleEnvironment; -import org.apache.flink.runtime.shuffle.ShuffleIOOwnerContext; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.plugin.flink.netty.NettyShuffleEnvironmentWrapper; - -/** - * The implementation of {@link ShuffleEnvironment} based on the remote shuffle service, providing - * shuffle environment on flink TM side. - */ -public class RemoteShuffleEnvironment extends AbstractRemoteShuffleEnvironment - implements ShuffleEnvironment { - - /** Factory class to create {@link RemoteShuffleResultPartition}. */ - private final RemoteShuffleResultPartitionFactory resultPartitionFactory; - - private final RemoteShuffleInputGateFactory inputGateFactory; - - private final NettyShuffleEnvironmentWrapper shuffleEnvironmentWrapper; - - private final ConcurrentHashMap.KeySetView nettyResultIds = - ConcurrentHashMap.newKeySet(); - - private final ConcurrentHashMap.KeySetView - nettyResultPartitionIds = ConcurrentHashMap.newKeySet(); - - /** - * @param networkBufferPool Network buffer pool for shuffle read and shuffle write. - * @param resultPartitionManager A trivial {@link ResultPartitionManager}. - * @param resultPartitionFactory Factory class to create {@link RemoteShuffleResultPartition}. - * @param inputGateFactory Factory class to create {@link RemoteShuffleInputGate}. - * @param shuffleEnvironmentWrapper Wrapper class to create {@link NettyShuffleEnvironment}. - */ - public RemoteShuffleEnvironment( - NetworkBufferPool networkBufferPool, - ResultPartitionManager resultPartitionManager, - RemoteShuffleResultPartitionFactory resultPartitionFactory, - RemoteShuffleInputGateFactory inputGateFactory, - CelebornConf conf, - NettyShuffleEnvironmentWrapper shuffleEnvironmentWrapper) { - - super(networkBufferPool, resultPartitionManager, conf); - this.resultPartitionFactory = resultPartitionFactory; - this.inputGateFactory = inputGateFactory; - this.shuffleEnvironmentWrapper = shuffleEnvironmentWrapper; - } - - @Override - public ResultPartitionWriter createResultPartitionWriterInternal( - ShuffleIOOwnerContext ownerContext, - int index, - ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor, - CelebornConf conf) { - if (resultPartitionDeploymentDescriptor.getShuffleDescriptor() - instanceof RemoteShuffleDescriptor) { - return resultPartitionFactory.create( - ownerContext.getOwnerName(), index, resultPartitionDeploymentDescriptor, conf); - } else { - nettyResultIds.add(resultPartitionDeploymentDescriptor.getResultId()); - nettyResultPartitionIds.add(resultPartitionDeploymentDescriptor.getPartitionId()); - return shuffleEnvironmentWrapper - .nettyResultPartitionFactory() - .create(ownerContext.getOwnerName(), index, resultPartitionDeploymentDescriptor); - } - } - - @Override - IndexedInputGate createInputGateInternal( - ShuffleIOOwnerContext ownerContext, - PartitionProducerStateProvider producerStateProvider, - int gateIndex, - InputGateDeploymentDescriptor igdd) { - return nettyResultIds.contains(igdd.getConsumedResultId()) - ? shuffleEnvironmentWrapper - .nettyInputGateFactory() - .create( - ownerContext, - gateIndex, - igdd, - producerStateProvider, - new InputChannelMetrics( - ownerContext.getInputGroup(), ownerContext.getParentGroup())) - : inputGateFactory.create(ownerContext.getOwnerName(), gateIndex, igdd); - } - - public void releasePartitionsLocally(Collection partitionIds) { - List resultPartitionIds = - partitionIds.stream() - .filter(partitionId -> nettyResultPartitionIds.contains(partitionId.getPartitionId())) - .collect(Collectors.toList()); - if (!resultPartitionIds.isEmpty()) { - shuffleEnvironmentWrapper - .nettyShuffleEnvironment() - .releasePartitionsLocally(resultPartitionIds); - } - } - - @VisibleForTesting - RemoteShuffleResultPartitionFactory getResultPartitionFactory() { - return resultPartitionFactory; - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java deleted file mode 100644 index 425c3536ff7..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGate.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.io.IOException; -import java.net.InetAddress; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.CompletableFuture; - -import org.apache.flink.core.memory.MemorySegment; -import org.apache.flink.core.memory.MemorySegmentProvider; -import org.apache.flink.metrics.SimpleCounter; -import org.apache.flink.runtime.checkpoint.CheckpointOptions; -import org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter; -import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo; -import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo; -import org.apache.flink.runtime.clusterframework.types.ResourceID; -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor; -import org.apache.flink.runtime.deployment.SubpartitionIndexRange; -import org.apache.flink.runtime.event.TaskEvent; -import org.apache.flink.runtime.io.network.ConnectionID; -import org.apache.flink.runtime.io.network.LocalConnectionManager; -import org.apache.flink.runtime.io.network.api.CheckpointBarrier; -import org.apache.flink.runtime.io.network.buffer.Buffer; -import org.apache.flink.runtime.io.network.buffer.BufferDecompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.io.network.partition.consumer.InputChannel; -import org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel; -import org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate; -import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; -import org.apache.flink.runtime.taskmanager.TaskManagerLocation; -import org.apache.flink.runtime.throughput.ThroughputCalculator; -import org.apache.flink.util.CloseableIterator; -import org.apache.flink.util.FlinkRuntimeException; -import org.apache.flink.util.clock.SystemClock; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.common.CelebornConf; - -/** A {@link IndexedInputGate} which ingest data from remote shuffle workers. */ -public class RemoteShuffleInputGate extends IndexedInputGate { - - private final RemoteShuffleInputGateDelegation inputGateDelegation; - - public RemoteShuffleInputGate( - CelebornConf celebornConf, - String taskName, - int gateIndex, - InputGateDeploymentDescriptor gateDescriptor, - SupplierWithException bufferPoolFactory, - BufferDecompressor bufferDecompressor, - int numConcurrentReading) { - - inputGateDelegation = - new RemoteShuffleInputGateDelegation( - celebornConf, - taskName, - gateIndex, - gateDescriptor, - bufferPoolFactory, - bufferDecompressor, - numConcurrentReading, - availabilityHelper, - gateDescriptor.getConsumedSubpartitionIndexRange().getStartIndex(), - gateDescriptor.getConsumedSubpartitionIndexRange().getEndIndex()); - } - - /** Setup gate and build network connections. */ - @Override - public void setup() throws IOException { - inputGateDelegation.setup(); - } - - /** Index of the gate of the corresponding computing task. */ - @Override - public int getGateIndex() { - return inputGateDelegation.getGateIndex(); - } - - /** Get number of input channels. A channel is a data flow from one shuffle worker. */ - @Override - public int getNumberOfInputChannels() { - return inputGateDelegation.getBufferReaders().size(); - } - - /** Whether reading is finished -- all channels are finished and cached buffers are drained. */ - @Override - public boolean isFinished() { - return inputGateDelegation.isFinished(); - } - - @Override - public Optional getNext() { - throw new UnsupportedOperationException("Not implemented (DataSet API is not supported)."); - } - - /** Poll a received {@link BufferOrEvent}. */ - @Override - public Optional pollNext() throws IOException { - return inputGateDelegation.pollNext(); - } - - /** Close all reading channels inside this {@link RemoteShuffleInputGate}. */ - @Override - public void close() throws Exception { - inputGateDelegation.close(); - } - - /** Get {@link InputChannelInfo}s of this {@link RemoteShuffleInputGate}. */ - @Override - public List getChannelInfos() { - return inputGateDelegation.getChannelsInfo(); - } - - @Override - public void requestPartitions() { - // do-nothing - } - - @Override - public void checkpointStarted(CheckpointBarrier barrier) { - // do-nothing. - } - - @Override - public void checkpointStopped(long cancelledCheckpointId) { - // do-nothing. - } - - @Override - public void triggerDebloating() { - // do-nothing. - } - - @Override - public List getUnfinishedChannels() { - return Collections.emptyList(); - } - - @Override - public EndOfDataStatus hasReceivedEndOfData() { - if (inputGateDelegation.getPendingEndOfDataEvents() > 0) { - return EndOfDataStatus.NOT_END_OF_DATA; - } else { - // Keep compatibility with streaming mode. - return EndOfDataStatus.DRAINED; - } - } - - @Override - public void finishReadRecoveredState() { - // do-nothing. - } - - @Override - public InputChannel getChannel(int channelIndex) { - return new FakedRemoteInputChannel(channelIndex); - } - - @Override - public void sendTaskEvent(TaskEvent event) { - throw new FlinkRuntimeException("Method should not be called."); - } - - @Override - public void resumeConsumption(InputChannelInfo channelInfo) { - throw new FlinkRuntimeException("Method should not be called."); - } - - @Override - public void acknowledgeAllRecordsProcessed(InputChannelInfo inputChannelInfo) {} - - @Override - public CompletableFuture getStateConsumedFuture() { - return CompletableFuture.completedFuture(null); - } - - @Override - public String toString() { - return String.format( - "ReadGate [owning task: %s, gate index: %d, descriptor: %s]", - inputGateDelegation.getTaskName(), - inputGateDelegation.getGateIndex(), - inputGateDelegation.getGateDescriptor().toString()); - } - - /** Accommodation for the incompleteness of Flink pluggable shuffle service. */ - private class FakedRemoteInputChannel extends RemoteInputChannel { - FakedRemoteInputChannel(int channelIndex) { - super( - new SingleInputGate( - inputGateDelegation.getTaskName(), - inputGateDelegation.getGateIndex(), - new IntermediateDataSetID(), - ResultPartitionType.BLOCKING, - new SubpartitionIndexRange(0, 0), - 1, - (a, b, c) -> {}, - () -> null, - null, - new FakedMemorySegmentProvider(), - 0, - new ThroughputCalculator(SystemClock.getInstance()), - null), - channelIndex, - new ResultPartitionID(), - 0, - new ConnectionID( - new TaskManagerLocation(ResourceID.generate(), InetAddress.getLoopbackAddress(), 1), - 0), - new LocalConnectionManager(), - 0, - 0, - 0, - new SimpleCounter(), - new SimpleCounter(), - new FakedChannelStateWriter()); - } - } - - /** Accommodation for the incompleteness of Flink pluggable shuffle service. */ - private static class FakedMemorySegmentProvider implements MemorySegmentProvider { - - @Override - public Collection requestUnpooledMemorySegments(int i) throws IOException { - return null; - } - - @Override - public void recycleUnpooledMemorySegments(Collection collection) - throws IOException {} - } - - /** Accommodation for the incompleteness of Flink pluggable shuffle service. */ - private static class FakedChannelStateWriter implements ChannelStateWriter { - - @Override - public void start(long cpId, CheckpointOptions checkpointOptions) {} - - @Override - public void addInputData( - long cpId, InputChannelInfo info, int startSeqNum, CloseableIterator data) {} - - @Override - public void addOutputData( - long cpId, ResultSubpartitionInfo info, int startSeqNum, Buffer... data) {} - - @Override - public void finishInput(long checkpointId) {} - - @Override - public void finishOutput(long checkpointId) {} - - @Override - public void abort(long checkpointId, Throwable cause, boolean cleanup) {} - - @Override - public ChannelStateWriteResult getAndRemoveWriteResult(long checkpointId) { - return null; - } - - @Override - public void close() {} - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java deleted file mode 100644 index 54737f03a2d..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleInputGateFactory.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.io.IOException; - -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor; -import org.apache.flink.runtime.io.network.buffer.BufferDecompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.common.CelebornConf; - -/** Factory class to create {@link RemoteShuffleInputGate}. */ -public class RemoteShuffleInputGateFactory extends AbstractRemoteShuffleInputGateFactory { - - public RemoteShuffleInputGateFactory( - CelebornConf conf, NetworkBufferPool networkBufferPool, int networkBufferSize) { - super(conf, networkBufferPool, networkBufferSize); - } - - @Override - protected RemoteShuffleInputGate createInputGate( - String owningTaskName, - int gateIndex, - InputGateDeploymentDescriptor igdd, - SupplierWithException bufferPoolFactory, - String compressionCodec) { - BufferDecompressor bufferDecompressor = - new BufferDecompressor(networkBufferSize, compressionCodec); - return new RemoteShuffleInputGate( - this.celebornConf, - owningTaskName, - gateIndex, - igdd, - bufferPoolFactory, - bufferDecompressor, - numConcurrentReading); - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java deleted file mode 100644 index e507526f5f6..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartition.java +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.apache.celeborn.plugin.flink.utils.Utils.checkNotNull; -import static org.apache.celeborn.plugin.flink.utils.Utils.checkState; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.concurrent.CompletableFuture; - -import javax.annotation.Nullable; - -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.runtime.event.AbstractEvent; -import org.apache.flink.runtime.io.network.api.EndOfData; -import org.apache.flink.runtime.io.network.api.EndOfPartitionEvent; -import org.apache.flink.runtime.io.network.api.StopMode; -import org.apache.flink.runtime.io.network.api.serialization.EventSerializer; -import org.apache.flink.runtime.io.network.buffer.Buffer; -import org.apache.flink.runtime.io.network.buffer.Buffer.DataType; -import org.apache.flink.runtime.io.network.buffer.BufferCompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.partition.*; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.plugin.flink.buffer.BufferWithSubpartition; -import org.apache.celeborn.plugin.flink.buffer.DataBuffer; -import org.apache.celeborn.plugin.flink.utils.BufferUtils; -import org.apache.celeborn.plugin.flink.utils.Utils; - -/** - * A {@link ResultPartition} which appends records and events to {@link DataBuffer} and after the - * {@link DataBuffer} is full, all data in the {@link DataBuffer} will be copied and spilled to the - * remote shuffle service in subpartition index order sequentially. Large records that can not be - * appended to an empty {@link DataBuffer} will be spilled directly. - */ -public class RemoteShuffleResultPartition extends ResultPartition { - - private final RemoteShuffleResultPartitionDelegation delegation; - - private final SupplierWithException bufferPoolFactory; - - public RemoteShuffleResultPartition( - String owningTaskName, - int partitionIndex, - ResultPartitionID partitionId, - ResultPartitionType partitionType, - int numSubpartitions, - int numTargetKeyGroups, - int networkBufferSize, - ResultPartitionManager partitionManager, - @Nullable BufferCompressor bufferCompressor, - SupplierWithException bufferPoolFactory, - RemoteShuffleOutputGate outputGate) { - - super( - owningTaskName, - partitionIndex, - partitionId, - partitionType, - numSubpartitions, - numTargetKeyGroups, - partitionManager, - bufferCompressor, - bufferPoolFactory); - - delegation = - new RemoteShuffleResultPartitionDelegation( - networkBufferSize, outputGate, this::updateStatistics, numSubpartitions); - this.bufferPoolFactory = bufferPoolFactory; - } - - @Override - public void setup() throws IOException { - // We can't call the `setup` method of the base class, otherwise it will cause a partition leak. - // The reason is that this partition will be registered to the partition manager during - // `super.setup()`. - // Since this is a cluster/remote partition(i.e. resources are not stored on the Flink TM), - // Flink does not trigger the resource releasing over TM. Therefore, the partition object is - // leaked. - // So we copy the logic of `setup` but don't register partition to partition manager. - checkState( - this.bufferPool == null, - "Bug in result partition setup logic: Already registered buffer pool."); - this.bufferPool = checkNotNull(bufferPoolFactory.get()); - BufferUtils.reserveNumRequiredBuffers(bufferPool, 1); - delegation.setup( - bufferPool, bufferCompressor, this::canBeCompressed, this::checkInProduceState); - } - - @Override - public void emitRecord(ByteBuffer record, int targetSubpartition) throws IOException { - delegation.emit(record, targetSubpartition, DataType.DATA_BUFFER, false); - } - - @Override - public void broadcastRecord(ByteBuffer record) throws IOException { - delegation.broadcast(record, DataType.DATA_BUFFER); - } - - @Override - public void broadcastEvent(AbstractEvent event, boolean isPriorityEvent) throws IOException { - Buffer buffer = EventSerializer.toBuffer(event, isPriorityEvent); - try { - ByteBuffer serializedEvent = buffer.getNioBufferReadable(); - delegation.broadcast(serializedEvent, buffer.getDataType()); - } finally { - buffer.recycleBuffer(); - } - } - - @Override - public void finish() throws IOException { - Utils.checkState(!isReleased(), "Result partition is already released."); - broadcastEvent(EndOfPartitionEvent.INSTANCE, false); - delegation.finish(); - super.finish(); - } - - @Override - public synchronized void close() { - delegation.close(super::close); - } - - @Override - protected void releaseInternal() { - // no-op - } - - @Override - public void flushAll() { - delegation.flushAll(); - } - - @Override - public void flush(int subpartitionIndex) { - flushAll(); - } - - @Override - public CompletableFuture getAvailableFuture() { - return AVAILABLE; - } - - @Override - public int getNumberOfQueuedBuffers() { - return 0; - } - - @Override - public long getSizeOfQueuedBuffersUnsafe() { - return 0; - } - - @Override - public int getNumberOfQueuedBuffers(int targetSubpartition) { - return 0; - } - - @Override - public ResultSubpartitionView createSubpartitionView( - int index, BufferAvailabilityListener availabilityListener) { - throw new UnsupportedOperationException("Not supported."); - } - - @Override - public void notifyEndOfData(StopMode mode) throws IOException { - if (!delegation.isEndOfDataNotified()) { - broadcastEvent(new EndOfData(mode), false); - delegation.setEndOfDataNotified(true); - } - } - - @Override - public CompletableFuture getAllDataProcessedFuture() { - return CompletableFuture.completedFuture(null); - } - - @Override - public String toString() { - return "ResultPartition " - + partitionId.toString() - + " [" - + partitionType - + ", " - + numSubpartitions - + " subpartitions, shuffle-descriptor: " - + delegation.getOutputGate().getShuffleDesc() - + "]"; - } - - @VisibleForTesting - public RemoteShuffleResultPartitionDelegation getDelegation() { - return delegation; - } - - public void updateStatistics(BufferWithSubpartition bufferWithSubpartition, boolean isBroadcast) { - numBuffersOut.inc(isBroadcast ? numSubpartitions : 1); - long readableBytes = - (long) bufferWithSubpartition.getBuffer().readableBytes() - BufferUtils.HEADER_LENGTH; - numBytesProduced.inc(readableBytes); - numBytesOut.inc(isBroadcast ? readableBytes * numSubpartitions : readableBytes); - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java deleted file mode 100644 index 9078e454bee..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactory.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.io.IOException; -import java.util.List; - -import org.apache.flink.runtime.io.network.buffer.BufferCompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.buffer.BufferPoolFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartition; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.util.function.SupplierWithException; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.common.protocol.CompressionCodec; - -/** Factory class to create {@link RemoteShuffleResultPartition}. */ -public class RemoteShuffleResultPartitionFactory - extends AbstractRemoteShuffleResultPartitionFactory { - - public RemoteShuffleResultPartitionFactory( - CelebornConf celebornConf, - ResultPartitionManager partitionManager, - BufferPoolFactory bufferPoolFactory, - int networkBufferSize) { - - super(celebornConf, partitionManager, bufferPoolFactory, networkBufferSize); - } - - @Override - public ResultPartition createRemoteShuffleResultPartitionInternal( - String taskNameWithSubtaskAndId, - int partitionIndex, - ResultPartitionID id, - ResultPartitionType type, - int numSubpartitions, - int maxParallelism, - List> bufferPoolFactories, - CelebornConf celebornConf, - int numMappers, - BufferCompressor bufferCompressor, - RemoteShuffleDescriptor rsd) { - return new RemoteShuffleResultPartition( - taskNameWithSubtaskAndId, - partitionIndex, - id, - type, - numSubpartitions, - maxParallelism, - networkBufferSize, - partitionManager, - bufferCompressor, - bufferPoolFactories.get(0), - new RemoteShuffleOutputGate( - rsd, - numSubpartitions, - networkBufferSize, - bufferPoolFactories.get(1), - celebornConf, - numMappers)); - } - - protected BufferCompressor getBufferCompressor() { - return CompressionCodec.NONE.name().equals(compressionCodec) - ? null - : new BufferCompressor(networkBufferSize, compressionCodec); - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java deleted file mode 100644 index ee55098940b..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactory.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import org.apache.flink.runtime.io.network.NettyShuffleServiceFactory; -import org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.shuffle.*; - -import org.apache.celeborn.plugin.flink.netty.NettyShuffleEnvironmentWrapper; - -public class RemoteShuffleServiceFactory extends AbstractRemoteShuffleServiceFactory - implements ShuffleServiceFactory { - - private final NettyShuffleServiceFactory nettyShuffleServiceFactory = - new NettyShuffleServiceFactory(); - - @Override - public ShuffleMaster createShuffleMaster( - ShuffleMasterContext shuffleMasterContext) { - return new RemoteShuffleMaster( - shuffleMasterContext, new SimpleResultPartitionAdapter(), nettyShuffleServiceFactory); - } - - @Override - public ShuffleEnvironment createShuffleEnvironment( - ShuffleEnvironmentContext shuffleEnvironmentContext) { - AbstractRemoteShuffleServiceParameters parameters = - initializePreCreateShuffleEnvironment(shuffleEnvironmentContext); - RemoteShuffleResultPartitionFactory resultPartitionFactory = - new RemoteShuffleResultPartitionFactory( - parameters.celebornConf, - parameters.resultPartitionManager, - parameters.networkBufferPool, - parameters.bufferSize); - RemoteShuffleInputGateFactory inputGateFactory = - new RemoteShuffleInputGateFactory( - parameters.celebornConf, parameters.networkBufferPool, parameters.bufferSize); - - return new RemoteShuffleEnvironment( - parameters.networkBufferPool, - parameters.resultPartitionManager, - resultPartitionFactory, - inputGateFactory, - parameters.celebornConf, - new NettyShuffleEnvironmentWrapper(nettyShuffleServiceFactory, shuffleEnvironmentContext)); - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java deleted file mode 100644 index e1be2229996..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/SimpleResultPartitionAdapter.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; - -public class SimpleResultPartitionAdapter implements ResultPartitionAdapter { - @Override - public boolean isBlockingResultPartition(ResultPartitionType partitionType) { - return partitionType.isBlocking(); - } -} diff --git a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java b/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java deleted file mode 100644 index 3706feeffb5..00000000000 --- a/client-flink/flink-1.15/src/main/java/org/apache/celeborn/plugin/flink/netty/NettyShuffleEnvironmentWrapper.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink.netty; - -import org.apache.flink.runtime.io.network.NettyShuffleEnvironment; -import org.apache.flink.runtime.io.network.NettyShuffleServiceFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartitionFactory; -import org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateFactory; -import org.apache.flink.runtime.shuffle.ShuffleEnvironmentContext; - -import org.apache.celeborn.reflect.DynFields; - -/** - * The wrapper of {@link NettyShuffleEnvironment} to generate {@link ResultPartitionFactory} and - * {@link SingleInputGateFactory}. - */ -public class NettyShuffleEnvironmentWrapper { - - private final NettyShuffleServiceFactory nettyShuffleServiceFactory; - private final ShuffleEnvironmentContext shuffleEnvironmentContext; - - private volatile NettyShuffleEnvironment nettyShuffleEnvironment; - private volatile ResultPartitionFactory nettyResultPartitionFactory; - private volatile SingleInputGateFactory nettyInputGateFactory; - - private static final DynFields.UnboundField - RESULT_PARTITION_FACTORY_FIELD = - DynFields.builder() - .hiddenImpl(NettyShuffleEnvironment.class, "resultPartitionFactory") - .defaultAlwaysNull() - .build(); - - private static final DynFields.UnboundField INPUT_GATE_FACTORY_FIELD = - DynFields.builder() - .hiddenImpl(NettyShuffleEnvironment.class, "singleInputGateFactory") - .defaultAlwaysNull() - .build(); - - public NettyShuffleEnvironmentWrapper( - NettyShuffleServiceFactory nettyShuffleServiceFactory, - ShuffleEnvironmentContext shuffleEnvironmentContext) { - this.nettyShuffleServiceFactory = nettyShuffleServiceFactory; - this.shuffleEnvironmentContext = shuffleEnvironmentContext; - } - - public NettyShuffleEnvironment nettyShuffleEnvironment() { - if (nettyShuffleEnvironment == null) { - synchronized (this) { - if (nettyShuffleEnvironment == null) { - nettyShuffleEnvironment = - nettyShuffleServiceFactory.createShuffleEnvironment(shuffleEnvironmentContext); - } - } - } - return nettyShuffleEnvironment; - } - - public ResultPartitionFactory nettyResultPartitionFactory() { - if (nettyResultPartitionFactory == null) { - synchronized (this) { - if (nettyResultPartitionFactory == null) { - nettyResultPartitionFactory = - RESULT_PARTITION_FACTORY_FIELD.bind(nettyShuffleEnvironment()).get(); - } - } - } - return nettyResultPartitionFactory; - } - - public SingleInputGateFactory nettyInputGateFactory() { - if (nettyInputGateFactory == null) { - synchronized (this) { - if (nettyInputGateFactory == null) { - nettyInputGateFactory = INPUT_GATE_FACTORY_FIELD.bind(nettyShuffleEnvironment()).get(); - } - } - } - return nettyInputGateFactory; - } -} diff --git a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java b/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java deleted file mode 100644 index 2d80815b5a3..00000000000 --- a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleMasterSuiteJ.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; - -import org.apache.flink.api.common.BatchShuffleMode; -import org.apache.flink.api.common.JobID; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.ExecutionOptions; -import org.apache.flink.configuration.MemorySize; -import org.apache.flink.runtime.clusterframework.types.ResourceID; -import org.apache.flink.runtime.executiongraph.ExecutionAttemptID; -import org.apache.flink.runtime.io.network.NettyShuffleServiceFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; -import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID; -import org.apache.flink.runtime.shuffle.*; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.common.protocol.FallbackPolicy; -import org.apache.celeborn.common.util.Utils$; -import org.apache.celeborn.plugin.flink.fallback.ForceFallbackPolicy; -import org.apache.celeborn.plugin.flink.utils.FlinkUtils; - -public class RemoteShuffleMasterSuiteJ { - - private static final Logger LOG = LoggerFactory.getLogger(RemoteShuffleMasterSuiteJ.class); - private RemoteShuffleMaster remoteShuffleMaster; - private Configuration configuration; - - @Before - public void setUp() { - configuration = new Configuration(); - int startPort = Utils$.MODULE$.selectRandomInt(1024, 65535); - configuration.setInteger("celeborn.master.port", startPort); - configuration.setString("celeborn.master.endpoints", "localhost:" + startPort); - configuration.setString("celeborn.client.application.heartbeatInterval", "30s"); - remoteShuffleMaster = createShuffleMaster(configuration); - } - - @Test - public void testRegisterJob() { - JobShuffleContext jobShuffleContext = createJobShuffleContext(JobID.generate()); - remoteShuffleMaster.registerJob(jobShuffleContext); - - // reRunRegister job - try { - remoteShuffleMaster.registerJob(jobShuffleContext); - } catch (Exception e) { - Assert.assertTrue(true); - } - - // unRegister job - remoteShuffleMaster.unregisterJob(jobShuffleContext.getJobId()); - remoteShuffleMaster.registerJob(jobShuffleContext); - } - - @Test - public void testRegisterJobWithForceFallbackPolicy() { - configuration.setString( - CelebornConf.FLINK_SHUFFLE_FALLBACK_POLICY().key(), FallbackPolicy.ALWAYS.name()); - remoteShuffleMaster = createShuffleMaster(configuration, new NettyShuffleServiceFactory()); - JobID jobID = JobID.generate(); - JobShuffleContext jobShuffleContext = createJobShuffleContext(jobID); - remoteShuffleMaster.registerJob(jobShuffleContext); - Assert.assertTrue(remoteShuffleMaster.jobFallbackPolicies().containsKey(jobID)); - remoteShuffleMaster.unregisterJob(jobShuffleContext.getJobId()); - Assert.assertTrue(remoteShuffleMaster.jobFallbackPolicies().isEmpty()); - } - - @Test - public void testRegisterPartitionWithProducer() - throws UnknownHostException, ExecutionException, InterruptedException { - JobID jobID = JobID.generate(); - JobShuffleContext jobShuffleContext = createJobShuffleContext(jobID); - remoteShuffleMaster.registerJob(jobShuffleContext); - - IntermediateDataSetID intermediateDataSetID = new IntermediateDataSetID(); - PartitionDescriptor partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 0); - ProducerDescriptor producerDescriptor = createProducerDescriptor(); - RemoteShuffleDescriptor remoteShuffleDescriptor = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertEquals(1, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - ShuffleResource shuffleResource = remoteShuffleDescriptor.getShuffleResource(); - ShuffleResourceDescriptor mapPartitionShuffleDescriptor = - shuffleResource.getMapPartitionShuffleDescriptor(); - - LOG.info("remoteShuffleDescriptor:{}", remoteShuffleDescriptor); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getShuffleId()); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getPartitionId()); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getAttemptId()); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getMapId()); - - // use same dataset id - partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 1); - remoteShuffleDescriptor = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertEquals(2, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - mapPartitionShuffleDescriptor = - remoteShuffleDescriptor.getShuffleResource().getMapPartitionShuffleDescriptor(); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getShuffleId()); - Assert.assertEquals(1, mapPartitionShuffleDescriptor.getMapId()); - - // use another attemptId - producerDescriptor = createProducerDescriptor(); - remoteShuffleDescriptor = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertEquals(3, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - mapPartitionShuffleDescriptor = - remoteShuffleDescriptor.getShuffleResource().getMapPartitionShuffleDescriptor(); - Assert.assertEquals(0, mapPartitionShuffleDescriptor.getShuffleId()); - Assert.assertEquals(1, mapPartitionShuffleDescriptor.getAttemptId()); - Assert.assertEquals(1, mapPartitionShuffleDescriptor.getMapId()); - } - - @Test - public void testRegisterPartitionWithProducerForForceFallbackPolicy() - throws UnknownHostException, ExecutionException, InterruptedException { - configuration.setString( - CelebornConf.FLINK_SHUFFLE_FALLBACK_POLICY().key(), FallbackPolicy.ALWAYS.name()); - remoteShuffleMaster = createShuffleMaster(configuration, new NettyShuffleServiceFactory()); - JobID jobID = JobID.generate(); - JobShuffleContext jobShuffleContext = createJobShuffleContext(jobID); - remoteShuffleMaster.registerJob(jobShuffleContext); - - IntermediateDataSetID intermediateDataSetID = new IntermediateDataSetID(); - PartitionDescriptor partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 0); - ProducerDescriptor producerDescriptor = createProducerDescriptor(); - ShuffleDescriptor shuffleDescriptor = - remoteShuffleMaster - .registerPartitionWithProducer(jobID, partitionDescriptor, producerDescriptor) - .get(); - Assert.assertTrue(shuffleDescriptor instanceof NettyShuffleDescriptor); - Assert.assertEquals(1, remoteShuffleMaster.lifecycleManager().shuffleCount().sum()); - Map shuffleFallbackCounts = - remoteShuffleMaster.lifecycleManager().shuffleFallbackCounts(); - Assert.assertEquals(1, shuffleFallbackCounts.size()); - Assert.assertEquals( - 1L, shuffleFallbackCounts.get(ForceFallbackPolicy.class.getName()).longValue()); - } - - @Test - public void testRegisterMultipleJobs() - throws UnknownHostException, ExecutionException, InterruptedException { - JobID jobID1 = JobID.generate(); - JobShuffleContext jobShuffleContext1 = createJobShuffleContext(jobID1); - remoteShuffleMaster.registerJob(jobShuffleContext1); - - JobID jobID2 = JobID.generate(); - JobShuffleContext jobShuffleContext2 = createJobShuffleContext(jobID2); - remoteShuffleMaster.registerJob(jobShuffleContext2); - - IntermediateDataSetID intermediateDataSetID = new IntermediateDataSetID(); - PartitionDescriptor partitionDescriptor = createPartitionDescriptor(intermediateDataSetID, 0); - ProducerDescriptor producerDescriptor = createProducerDescriptor(); - RemoteShuffleDescriptor remoteShuffleDescriptor1 = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID1, partitionDescriptor, producerDescriptor) - .get(); - - // use same datasetId but different jobId - RemoteShuffleDescriptor remoteShuffleDescriptor2 = - (RemoteShuffleDescriptor) - remoteShuffleMaster - .registerPartitionWithProducer(jobID2, partitionDescriptor, producerDescriptor) - .get(); - - Assert.assertEquals( - remoteShuffleDescriptor1 - .getShuffleResource() - .getMapPartitionShuffleDescriptor() - .getShuffleId(), - 0); - Assert.assertEquals( - remoteShuffleDescriptor2 - .getShuffleResource() - .getMapPartitionShuffleDescriptor() - .getShuffleId(), - 1); - } - - @Test - public void testShuffleMemoryAnnouncing() { - Map numberOfInputGateChannels = new HashMap<>(); - Map numbersOfResultSubpartitions = new HashMap<>(); - Map resultPartitionTypes = new HashMap<>(); - IntermediateDataSetID inputDataSetID0 = new IntermediateDataSetID(); - IntermediateDataSetID inputDataSetID1 = new IntermediateDataSetID(); - IntermediateDataSetID outputDataSetID0 = new IntermediateDataSetID(); - IntermediateDataSetID outputDataSetID1 = new IntermediateDataSetID(); - IntermediateDataSetID outputDataSetID2 = new IntermediateDataSetID(); - Random random = new Random(); - numberOfInputGateChannels.put(inputDataSetID0, random.nextInt(1000)); - numberOfInputGateChannels.put(inputDataSetID1, random.nextInt(1000)); - numbersOfResultSubpartitions.put(outputDataSetID0, random.nextInt(1000)); - numbersOfResultSubpartitions.put(outputDataSetID1, random.nextInt(1000)); - numbersOfResultSubpartitions.put(outputDataSetID2, random.nextInt(1000)); - resultPartitionTypes.put(outputDataSetID0, ResultPartitionType.BLOCKING); - resultPartitionTypes.put(outputDataSetID1, ResultPartitionType.BLOCKING); - resultPartitionTypes.put(outputDataSetID2, ResultPartitionType.BLOCKING); - MemorySize calculated = - remoteShuffleMaster.computeShuffleMemorySizeForTask( - TaskInputsOutputsDescriptor.from( - numberOfInputGateChannels, numbersOfResultSubpartitions, resultPartitionTypes)); - - CelebornConf conf = FlinkUtils.toCelebornConf(configuration); - - long numBytesPerGate = conf.clientFlinkMemoryPerInputGate(); - long expectedInput = 2 * numBytesPerGate; - - long numBytesPerResultPartition = conf.clientFlinkMemoryPerResultPartition(); - long expectedOutput = 3 * numBytesPerResultPartition; - MemorySize expected = new MemorySize(expectedInput + expectedOutput); - - Assert.assertEquals(expected, calculated); - } - - @Test - public void testInvalidShuffleConfig() { - Assert.assertThrows( - String.format( - "The config option %s should configure as %s", - ExecutionOptions.BATCH_SHUFFLE_MODE.key(), - BatchShuffleMode.ALL_EXCHANGES_BLOCKING.name()), - IllegalArgumentException.class, - () -> - createShuffleMaster( - new Configuration() - .set( - ExecutionOptions.BATCH_SHUFFLE_MODE, - BatchShuffleMode.ALL_EXCHANGES_PIPELINED))); - Configuration configuration = new Configuration(); - configuration.setString(CelebornConf.CLIENT_PUSH_REPLICATE_ENABLED().key(), "true"); - Assert.assertThrows( - String.format( - "Flink does not support replicate shuffle data. Please check the config %s.", - CelebornConf.CLIENT_PUSH_REPLICATE_ENABLED().key()), - IllegalArgumentException.class, - () -> createShuffleMaster(configuration)); - } - - @After - public void tearDown() { - if (remoteShuffleMaster != null) { - try { - remoteShuffleMaster.close(); - } catch (Exception e) { - LOG.warn(e.getMessage(), e); - } - } - } - - public RemoteShuffleMaster createShuffleMaster(Configuration configuration) { - return createShuffleMaster(configuration, null); - } - - public RemoteShuffleMaster createShuffleMaster( - Configuration configuration, NettyShuffleServiceFactory nettyShuffleServiceFactory) { - remoteShuffleMaster = - new RemoteShuffleMaster( - new ShuffleMasterContext() { - @Override - public Configuration getConfiguration() { - return configuration; - } - - @Override - public void onFatalError(Throwable throwable) { - System.exit(-1); - } - }, - new SimpleResultPartitionAdapter(), - nettyShuffleServiceFactory); - - return remoteShuffleMaster; - } - - public JobShuffleContext createJobShuffleContext(JobID jobId) { - return new JobShuffleContext() { - @Override - public org.apache.flink.api.common.JobID getJobId() { - return jobId; - } - - @Override - public CompletableFuture stopTrackingAndReleasePartitions( - Collection collection) { - return CompletableFuture.completedFuture(null); - } - }; - } - - public PartitionDescriptor createPartitionDescriptor( - IntermediateDataSetID intermediateDataSetId, int partitionNum) { - IntermediateResultPartitionID intermediateResultPartitionId = - new IntermediateResultPartitionID(intermediateDataSetId, partitionNum); - return new PartitionDescriptor( - intermediateDataSetId, - 10, - intermediateResultPartitionId, - ResultPartitionType.BLOCKING, - 5, - 1); - } - - public ProducerDescriptor createProducerDescriptor() throws UnknownHostException { - ExecutionAttemptID executionAttemptId = new ExecutionAttemptID(); - return new ProducerDescriptor( - ResourceID.generate(), executionAttemptId, InetAddress.getLocalHost(), 100); - } -} diff --git a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java b/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java deleted file mode 100644 index 677a7aabac2..00000000000 --- a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionFactorySuiteJ.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.mockito.Mockito.mock; - -import org.apache.flink.configuration.IllegalConfigurationException; -import org.apache.flink.runtime.io.network.buffer.BufferPoolFactory; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.junit.Assert; -import org.junit.Test; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.common.protocol.CompressionCodec; - -/** Tests for {@link RemoteShuffleResultPartitionFactory}. */ -public class RemoteShuffleResultPartitionFactorySuiteJ { - - @Test - public void testGetBufferCompressor() { - CelebornConf celebornConf = new CelebornConf(); - for (CompressionCodec compressionCodec : CompressionCodec.values()) { - RemoteShuffleResultPartitionFactory partitionFactory = - new RemoteShuffleResultPartitionFactory( - celebornConf.set( - CelebornConf.SHUFFLE_COMPRESSION_CODEC().key(), compressionCodec.name()), - mock(ResultPartitionManager.class), - mock(BufferPoolFactory.class), - 1); - if (CompressionCodec.NONE.equals(compressionCodec)) { - Assert.assertNull(partitionFactory.getBufferCompressor()); - } else if (CompressionCodec.LZ4.equals(compressionCodec)) { - Assert.assertNotNull(partitionFactory.getBufferCompressor()); - } else { - Assert.assertThrows( - IllegalConfigurationException.class, partitionFactory::getBufferCompressor); - } - } - } -} diff --git a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java b/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java deleted file mode 100644 index 2147c688877..00000000000 --- a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleResultPartitionSuiteJ.java +++ /dev/null @@ -1,618 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.time.Duration; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.Random; -import java.util.Set; -import java.util.stream.IntStream; - -import org.apache.flink.api.common.JobID; -import org.apache.flink.core.memory.MemorySegment; -import org.apache.flink.core.memory.MemorySegmentFactory; -import org.apache.flink.runtime.io.network.api.EndOfPartitionEvent; -import org.apache.flink.runtime.io.network.api.serialization.EventSerializer; -import org.apache.flink.runtime.io.network.buffer.Buffer; -import org.apache.flink.runtime.io.network.buffer.BufferCompressor; -import org.apache.flink.runtime.io.network.buffer.BufferDecompressor; -import org.apache.flink.runtime.io.network.buffer.BufferPool; -import org.apache.flink.runtime.io.network.buffer.NetworkBuffer; -import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionManager; -import org.apache.flink.runtime.io.network.partition.ResultPartitionType; -import org.apache.flink.util.function.SupplierWithException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import org.apache.celeborn.common.CelebornConf; -import org.apache.celeborn.plugin.flink.buffer.BufferPacker; -import org.apache.celeborn.plugin.flink.buffer.DataBuffer; -import org.apache.celeborn.plugin.flink.readclient.FlinkShuffleClientImpl; -import org.apache.celeborn.plugin.flink.utils.BufferUtils; - -public class RemoteShuffleResultPartitionSuiteJ { - private final int networkBufferSize = 32 * 1024; - private final BufferCompressor bufferCompressor = new BufferCompressor(networkBufferSize, "lz4"); - private final RemoteShuffleOutputGate remoteShuffleOutputGate = - mock(RemoteShuffleOutputGate.class); - private final CelebornConf conf = new CelebornConf(); - BufferDecompressor bufferDecompressor = new BufferDecompressor(networkBufferSize, "LZ4"); - - private static final int totalBuffers = 1000; - - private static final int bufferSize = 1024; - - private NetworkBufferPool globalBufferPool; - - private BufferPool sortBufferPool; - - private BufferPool nettyBufferPool; - - private RemoteShuffleResultPartition partitionWriter; - - private FakedRemoteShuffleOutputGate outputGate; - - @Before - public void setup() { - globalBufferPool = new NetworkBufferPool(totalBuffers, bufferSize); - } - - @After - public void tearDown() throws Exception { - if (outputGate != null) { - outputGate.release(); - } - - if (sortBufferPool != null) { - sortBufferPool.lazyDestroy(); - } - if (nettyBufferPool != null) { - nettyBufferPool.lazyDestroy(); - } - assertEquals(totalBuffers, globalBufferPool.getNumberOfAvailableMemorySegments()); - globalBufferPool.destroy(); - } - - @Test - public void tesSimpleFlush() throws IOException, InterruptedException { - List> bufferPool = createBufferPoolFactory(); - RemoteShuffleResultPartition remoteShuffleResultPartition = - new RemoteShuffleResultPartition( - "test", - 0, - new ResultPartitionID(), - ResultPartitionType.BLOCKING, - 2, - 2, - 32 * 1024, - new ResultPartitionManager(), - bufferCompressor, - bufferPool.get(0), - remoteShuffleOutputGate); - remoteShuffleResultPartition.setup(); - doNothing().when(remoteShuffleOutputGate).regionStart(anyBoolean()); - doNothing().when(remoteShuffleOutputGate).regionFinish(); - when(remoteShuffleOutputGate.getBufferPool()).thenReturn(bufferPool.get(1).get()); - DataBuffer dataBuffer = remoteShuffleResultPartition.getDelegation().getUnicastDataBuffer(); - ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[] {1, 2, 3}); - dataBuffer.append(byteBuffer, 0, Buffer.DataType.DATA_BUFFER); - remoteShuffleResultPartition.getDelegation().flushDataBuffer(dataBuffer, true); - } - - private List> createBufferPoolFactory() { - NetworkBufferPool networkBufferPool = - new NetworkBufferPool(256 * 8, 32 * 1024, Duration.ofSeconds(1)); - - int numBuffersPerPartition = 64 * 1024 / 32; - int numForResultPartition = numBuffersPerPartition * 7 / 8; - int numForOutputGate = numBuffersPerPartition - numForResultPartition; - - List> factories = new ArrayList<>(); - factories.add( - () -> networkBufferPool.createBufferPool(numForResultPartition, numForResultPartition)); - factories.add(() -> networkBufferPool.createBufferPool(numForOutputGate, numForOutputGate)); - return factories; - } - - @Test - public void testWriteNormalRecordWithCompressionEnabled() throws Exception { - testWriteNormalRecord(true); - } - - @Test - public void testWriteNormalRecordWithCompressionDisabled() throws Exception { - testWriteNormalRecord(false); - } - - @Test - public void testWriteLargeRecord() throws Exception { - int numSubpartitions = 2; - int numBuffers = 100; - initResultPartitionWriter(numSubpartitions, 10, 200, false, conf, 10); - - partitionWriter.setup(); - - byte[] dataWritten = new byte[bufferSize * numBuffers]; - Random random = new Random(); - random.nextBytes(dataWritten); - ByteBuffer recordWritten = ByteBuffer.wrap(dataWritten); - partitionWriter.emitRecord(recordWritten, 0); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.finish(); - partitionWriter.close(); - - List receivedBuffers = outputGate.getReceivedBuffers()[0]; - - ByteBuffer recordRead = ByteBuffer.allocate(bufferSize * numBuffers); - for (Buffer buffer : receivedBuffers) { - if (buffer.isBuffer()) { - recordRead.put( - buffer.getNioBuffer( - BufferUtils.HEADER_LENGTH, buffer.readableBytes() - BufferUtils.HEADER_LENGTH)); - } - } - recordWritten.rewind(); - recordRead.flip(); - assertEquals(recordWritten, recordRead); - } - - @Test - public void testBroadcastLargeRecord() throws Exception { - int numSubpartitions = 2; - int numBuffers = 100; - initResultPartitionWriter(numSubpartitions, 10, 200, false, conf, 10); - - partitionWriter.setup(); - - byte[] dataWritten = new byte[bufferSize * numBuffers]; - Random random = new Random(); - random.nextBytes(dataWritten); - ByteBuffer recordWritten = ByteBuffer.wrap(dataWritten); - partitionWriter.broadcastRecord(recordWritten); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.finish(); - partitionWriter.close(); - - ByteBuffer recordRead0 = ByteBuffer.allocate(bufferSize * numBuffers); - for (Buffer buffer : outputGate.getReceivedBuffers()[0]) { - if (buffer.isBuffer()) { - recordRead0.put( - buffer.getNioBuffer( - BufferUtils.HEADER_LENGTH, buffer.readableBytes() - BufferUtils.HEADER_LENGTH)); - } - } - recordWritten.rewind(); - recordRead0.flip(); - assertEquals(recordWritten, recordRead0); - - ByteBuffer recordRead1 = ByteBuffer.allocate(bufferSize * numBuffers); - for (Buffer buffer : outputGate.getReceivedBuffers()[1]) { - if (buffer.isBuffer()) { - recordRead1.put( - buffer.getNioBuffer( - BufferUtils.HEADER_LENGTH, buffer.readableBytes() - BufferUtils.HEADER_LENGTH)); - } - } - recordWritten.rewind(); - recordRead1.flip(); - assertEquals(recordWritten, recordRead0); - } - - @Test - public void testFlush() throws Exception { - int numSubpartitions = 10; - - initResultPartitionWriter(numSubpartitions, 10, 20, false, conf, 10); - partitionWriter.setup(); - - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 0); - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 1); - assertEquals(3, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.broadcastRecord(ByteBuffer.allocate(bufferSize)); - assertEquals(2, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.flush(0); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 2); - partitionWriter.emitRecord(ByteBuffer.allocate(bufferSize), 3); - assertEquals(3, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.flushAll(); - assertEquals(0, sortBufferPool.bestEffortGetNumOfUsedBuffers()); - - partitionWriter.finish(); - partitionWriter.close(); - } - - private void testWriteNormalRecord(boolean compressionEnabled) throws Exception { - int numSubpartitions = 4; - int numRecords = 100; - Random random = new Random(); - - initResultPartitionWriter(numSubpartitions, 100, 500, compressionEnabled, conf, 10); - partitionWriter.setup(); - assertTrue(outputGate.isSetup()); - - Queue[] dataWritten = new Queue[numSubpartitions]; - IntStream.range(0, numSubpartitions).forEach(i -> dataWritten[i] = new ArrayDeque<>()); - int[] numBytesWritten = new int[numSubpartitions]; - Arrays.fill(numBytesWritten, 0); - - for (int i = 0; i < numRecords; i++) { - byte[] data = new byte[random.nextInt(2 * bufferSize) + 1]; - if (compressionEnabled) { - byte randomByte = (byte) random.nextInt(); - Arrays.fill(data, randomByte); - } else { - random.nextBytes(data); - } - ByteBuffer record = ByteBuffer.wrap(data); - boolean isBroadCast = random.nextBoolean(); - - if (isBroadCast) { - partitionWriter.broadcastRecord(record); - IntStream.range(0, numSubpartitions) - .forEach( - subpartition -> - recordDataWritten( - record, - Buffer.DataType.DATA_BUFFER, - subpartition, - dataWritten, - numBytesWritten)); - } else { - int subpartition = random.nextInt(numSubpartitions); - partitionWriter.emitRecord(record, subpartition); - recordDataWritten( - record, Buffer.DataType.DATA_BUFFER, subpartition, dataWritten, numBytesWritten); - } - } - - partitionWriter.finish(); - assertTrue(outputGate.isFinished()); - partitionWriter.close(); - assertTrue(outputGate.isClosed()); - - for (int subpartition = 0; subpartition < numSubpartitions; ++subpartition) { - ByteBuffer record = EventSerializer.toSerializedEvent(EndOfPartitionEvent.INSTANCE); - recordDataWritten( - record, Buffer.DataType.EVENT_BUFFER, subpartition, dataWritten, numBytesWritten); - } - - outputGate - .getFinishedRegions() - .forEach( - regionIndex -> assertTrue(outputGate.getNumBuffersByRegion().containsKey(regionIndex))); - - int[] numBytesRead = new int[numSubpartitions]; - List[] receivedBuffers = outputGate.getReceivedBuffers(); - List[] validateTarget = new List[numSubpartitions]; - Arrays.fill(numBytesRead, 0); - for (int i = 0; i < numSubpartitions; i++) { - validateTarget[i] = new ArrayList<>(); - for (Buffer buffer : receivedBuffers[i]) { - for (Buffer unpackedBuffer : BufferPacker.unpack(buffer.asByteBuf())) { - if (compressionEnabled && unpackedBuffer.isCompressed()) { - Buffer decompressedBuffer = - bufferDecompressor.decompressToIntermediateBuffer(unpackedBuffer); - ByteBuffer decompressed = decompressedBuffer.getNioBufferReadable(); - int numBytes = decompressed.remaining(); - MemorySegment segment = MemorySegmentFactory.allocateUnpooledSegment(numBytes); - segment.put(0, decompressed, numBytes); - decompressedBuffer.recycleBuffer(); - validateTarget[i].add( - new NetworkBuffer(segment, buf -> {}, unpackedBuffer.getDataType(), numBytes)); - numBytesRead[i] += numBytes; - } else { - numBytesRead[i] += buffer.readableBytes(); - validateTarget[i].add(buffer); - } - } - } - } - IntStream.range(0, numSubpartitions).forEach(subpartitions -> {}); - checkWriteReadResult( - numSubpartitions, numBytesWritten, numBytesWritten, dataWritten, validateTarget); - } - - private void initResultPartitionWriter( - int numSubpartitions, - int sortBufferPoolSize, - int nettyBufferPoolSize, - boolean compressionEnabled, - CelebornConf conf, - int numMappers) - throws Exception { - - sortBufferPool = globalBufferPool.createBufferPool(sortBufferPoolSize, sortBufferPoolSize); - nettyBufferPool = globalBufferPool.createBufferPool(nettyBufferPoolSize, nettyBufferPoolSize); - - outputGate = - new FakedRemoteShuffleOutputGate( - getShuffleDescriptor(), numSubpartitions, () -> nettyBufferPool, conf, numMappers); - outputGate.setup(); - - if (compressionEnabled) { - partitionWriter = - new RemoteShuffleResultPartition( - "RemoteShuffleResultPartitionWriterTest", - 0, - new ResultPartitionID(), - ResultPartitionType.BLOCKING, - numSubpartitions, - numSubpartitions, - bufferSize, - new ResultPartitionManager(), - bufferCompressor, - () -> sortBufferPool, - outputGate); - } else { - partitionWriter = - new RemoteShuffleResultPartition( - "RemoteShuffleResultPartitionWriterTest", - 0, - new ResultPartitionID(), - ResultPartitionType.BLOCKING, - numSubpartitions, - numSubpartitions, - bufferSize, - new ResultPartitionManager(), - null, - () -> sortBufferPool, - outputGate); - } - } - - private void recordDataWritten( - ByteBuffer record, - Buffer.DataType dataType, - int subpartition, - Queue[] dataWritten, - int[] numBytesWritten) { - - record.rewind(); - dataWritten[subpartition].add(new DataAndType(record, dataType)); - numBytesWritten[subpartition] += record.remaining(); - } - - private static class FakedRemoteShuffleOutputGate extends RemoteShuffleOutputGate { - - private boolean isSetup; - private boolean isFinished; - private boolean isClosed; - private final List[] receivedBuffers; - private final Map numBuffersByRegion; - private final Set finishedRegions; - private int currentRegionIndex; - private boolean currentIsBroadcast; - - FakedRemoteShuffleOutputGate( - RemoteShuffleDescriptor shuffleDescriptor, - int numSubpartitions, - SupplierWithException bufferPoolFactory, - CelebornConf celebornConf, - int numMappers) { - - super( - shuffleDescriptor, - numSubpartitions, - bufferSize, - bufferPoolFactory, - celebornConf, - numMappers); - isSetup = false; - isFinished = false; - isClosed = false; - numBuffersByRegion = new HashMap<>(); - finishedRegions = new HashSet<>(); - currentRegionIndex = -1; - receivedBuffers = new ArrayList[numSubpartitions]; - IntStream.range(0, numSubpartitions).forEach(i -> receivedBuffers[i] = new ArrayList<>()); - currentIsBroadcast = false; - } - - @Override - FlinkShuffleClientImpl getShuffleClient() { - FlinkShuffleClientImpl client = mock(FlinkShuffleClientImpl.class); - doNothing().when(client).cleanup(anyInt(), anyInt(), anyInt()); - return client; - } - - @Override - public void setup() throws IOException, InterruptedException { - bufferPool = bufferPoolFactory.get(); - isSetup = true; - } - - @Override - public void write(Buffer buffer, int subIdx) { - if (currentIsBroadcast) { - assertEquals(0, subIdx); - ByteBuffer byteBuffer = buffer.getNioBufferReadable(); - for (int i = 0; i < numSubs; i++) { - int numBytes = buffer.readableBytes(); - MemorySegment segment = MemorySegmentFactory.allocateUnpooledSegment(numBytes); - byteBuffer.rewind(); - segment.put(0, byteBuffer, numBytes); - receivedBuffers[i].add( - new NetworkBuffer( - segment, buf -> {}, buffer.getDataType(), buffer.isCompressed(), numBytes)); - } - buffer.recycleBuffer(); - } else { - receivedBuffers[subIdx].add(buffer); - } - if (numBuffersByRegion.containsKey(currentRegionIndex)) { - int prev = numBuffersByRegion.get(currentRegionIndex); - numBuffersByRegion.put(currentRegionIndex, prev + 1); - } else { - numBuffersByRegion.put(currentRegionIndex, 1); - } - } - - @Override - public void regionStart(boolean isBroadcast) { - currentIsBroadcast = isBroadcast; - currentRegionIndex++; - } - - @Override - public void regionFinish() { - if (finishedRegions.contains(currentRegionIndex)) { - throw new IllegalStateException("Unexpected region: " + currentRegionIndex); - } - finishedRegions.add(currentRegionIndex); - } - - @Override - public void finish() throws InterruptedException { - isFinished = true; - } - - @Override - public void close() { - isClosed = true; - } - - public List[] getReceivedBuffers() { - return receivedBuffers; - } - - public Map getNumBuffersByRegion() { - return numBuffersByRegion; - } - - public Set getFinishedRegions() { - return finishedRegions; - } - - public boolean isSetup() { - return isSetup; - } - - public boolean isFinished() { - return isFinished; - } - - public boolean isClosed() { - return isClosed; - } - - public void release() throws Exception { - IntStream.range(0, numSubs) - .forEach( - subpartitionIndex -> { - receivedBuffers[subpartitionIndex].forEach(Buffer::recycleBuffer); - receivedBuffers[subpartitionIndex].clear(); - }); - numBuffersByRegion.clear(); - finishedRegions.clear(); - super.close(); - } - } - - private RemoteShuffleDescriptor getShuffleDescriptor() { - Random random = new Random(); - byte[] bytes = new byte[16]; - random.nextBytes(bytes); - return new RemoteShuffleDescriptor( - new JobID(bytes).toString(), - new JobID(bytes), - new JobID(bytes).toString(), - new ResultPartitionID(), - new RemoteShuffleResource( - "1", 2, System.currentTimeMillis(), new ShuffleResourceDescriptor(1, 1, 1, 0))); - } - - /** Data written and its {@link Buffer.DataType}. */ - public static class DataAndType { - private final ByteBuffer data; - private final Buffer.DataType dataType; - - DataAndType(ByteBuffer data, Buffer.DataType dataType) { - this.data = data; - this.dataType = dataType; - } - } - - public static void checkWriteReadResult( - int numSubpartitions, - int[] numBytesWritten, - int[] numBytesRead, - Queue[] dataWritten, - Collection[] buffersRead) { - for (int subpartitionIndex = 0; subpartitionIndex < numSubpartitions; ++subpartitionIndex) { - assertEquals(numBytesWritten[subpartitionIndex], numBytesRead[subpartitionIndex]); - - List eventsWritten = new ArrayList<>(); - List eventsRead = new ArrayList<>(); - - ByteBuffer subpartitionDataWritten = ByteBuffer.allocate(numBytesWritten[subpartitionIndex]); - for (DataAndType dataAndType : dataWritten[subpartitionIndex]) { - subpartitionDataWritten.put(dataAndType.data); - dataAndType.data.rewind(); - if (dataAndType.dataType.isEvent()) { - eventsWritten.add(dataAndType); - } - } - - ByteBuffer subpartitionDataRead = ByteBuffer.allocate(numBytesRead[subpartitionIndex]); - for (Buffer buffer : buffersRead[subpartitionIndex]) { - subpartitionDataRead.put(buffer.getNioBufferReadable()); - if (!buffer.isBuffer()) { - eventsRead.add(buffer); - } - } - - subpartitionDataWritten.flip(); - subpartitionDataRead.flip(); - assertEquals(subpartitionDataWritten, subpartitionDataRead); - - assertEquals(eventsWritten.size(), eventsRead.size()); - for (int i = 0; i < eventsWritten.size(); ++i) { - assertEquals(eventsWritten.get(i).dataType, eventsRead.get(i).getDataType()); - assertEquals(eventsWritten.get(i).data, eventsRead.get(i).getNioBufferReadable()); - } - } - } -} diff --git a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java b/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java deleted file mode 100644 index 49affdee4bd..00000000000 --- a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/RemoteShuffleServiceFactorySuiteJ.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.MemorySize; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter; -import org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate; -import org.apache.flink.runtime.shuffle.ShuffleEnvironment; -import org.apache.flink.runtime.shuffle.ShuffleEnvironmentContext; -import org.junit.Assert; -import org.junit.Test; - -public class RemoteShuffleServiceFactorySuiteJ { - @Test - public void testCreateShuffleEnvironment() { - RemoteShuffleServiceFactory remoteShuffleServiceFactory = new RemoteShuffleServiceFactory(); - ShuffleEnvironmentContext shuffleEnvironmentContext = mock(ShuffleEnvironmentContext.class); - when(shuffleEnvironmentContext.getConfiguration()).thenReturn(new Configuration()); - when(shuffleEnvironmentContext.getNetworkMemorySize()) - .thenReturn(new MemorySize(64 * 1024 * 1024)); - MetricGroup parentMetric = mock(MetricGroup.class); - when(shuffleEnvironmentContext.getParentMetricGroup()).thenReturn(parentMetric); - MetricGroup childGroup = mock(MetricGroup.class); - MetricGroup childChildGroup = mock(MetricGroup.class); - when(parentMetric.addGroup(anyString())).thenReturn(childGroup); - when(childGroup.addGroup(any())).thenReturn(childChildGroup); - when(childChildGroup.gauge(any(), any())).thenReturn(null); - ShuffleEnvironment shuffleEnvironment = - remoteShuffleServiceFactory.createShuffleEnvironment(shuffleEnvironmentContext); - Assert.assertEquals( - 32 * 1024, - ((RemoteShuffleEnvironment) shuffleEnvironment) - .getResultPartitionFactory() - .getNetworkBufferSize()); - } -} diff --git a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java b/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java deleted file mode 100644 index b93ae3ea152..00000000000 --- a/client-flink/flink-1.15/src/test/java/org/apache/celeborn/plugin/flink/ShuffleResourceTrackerSuiteJ.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.celeborn.plugin.flink; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ScheduledThreadPoolExecutor; - -import com.google.common.collect.Sets; -import org.apache.flink.api.common.JobID; -import org.apache.flink.runtime.io.network.partition.ResultPartitionID; -import org.apache.flink.runtime.shuffle.JobShuffleContext; -import org.junit.Assert; -import org.junit.Test; -import org.mockito.Mockito; - -import org.apache.celeborn.client.LifecycleManager; -import org.apache.celeborn.client.listener.WorkersStatus; -import org.apache.celeborn.common.meta.ShufflePartitionLocationInfo; -import org.apache.celeborn.common.meta.WorkerInfo; -import org.apache.celeborn.common.protocol.PartitionLocation; -import org.apache.celeborn.common.util.JavaUtils; - -public class ShuffleResourceTrackerSuiteJ { - - @Test - public void testNotifyUnknownWorkers() { - LifecycleManager lifecycleManager = Mockito.mock(LifecycleManager.class); - ScheduledThreadPoolExecutor executor = Mockito.mock(ScheduledThreadPoolExecutor.class); - - ConcurrentHashMap map = JavaUtils.newConcurrentHashMap(); - WorkerInfo workerInfo = new WorkerInfo("mock", -1, -1, -1, -1); - map.put(workerInfo.toUniqueId(), mockShufflePartitionLocationInfo(workerInfo)); - - ConcurrentHashMap map2 = JavaUtils.newConcurrentHashMap(); - map2.put(workerInfo.toUniqueId(), mockShufflePartitionLocationInfo(workerInfo)); - - ConcurrentHashMap map3 = JavaUtils.newConcurrentHashMap(); - map3.put(workerInfo.toUniqueId(), mockShufflePartitionLocationInfo(workerInfo)); - - Mockito.when(lifecycleManager.workerSnapshots(Mockito.anyInt())).thenReturn(map, map2, map3); - - ShuffleResourceTracker shuffleResourceTracker = - new ShuffleResourceTracker(executor, lifecycleManager); - - JobID jobID1 = new JobID(); - shuffleResourceTracker.registerJob(createJobShuffleContext(jobID1)); - shuffleResourceTracker.addPartitionResource(jobID1, 1, 1, new ResultPartitionID()); - shuffleResourceTracker.addPartitionResource(jobID1, 1, 2, new ResultPartitionID()); - shuffleResourceTracker.addPartitionResource(jobID1, 1, 3, new ResultPartitionID()); - shuffleResourceTracker.addPartitionResource(jobID1, 2, 3, new ResultPartitionID()); - - JobID jobID2 = new JobID(); - shuffleResourceTracker.registerJob(createJobShuffleContext(jobID2)); - shuffleResourceTracker.addPartitionResource(jobID2, 3, 1, new ResultPartitionID()); - - List workerInfoList = new ArrayList<>(); - workerInfoList.add(workerInfo); - shuffleResourceTracker.notifyChangedWorkersStatus(new WorkersStatus(workerInfoList, null)); - - Assert.assertEquals( - Sets.newHashSet(3), - shuffleResourceTracker - .getJobResourceListener(jobID1) - .getResultPartitionMap() - .get(2) - .keySet()); - Assert.assertEquals( - Sets.newHashSet(3), - shuffleResourceTracker - .getJobResourceListener(jobID1) - .getResultPartitionMap() - .get(2) - .keySet()); - - Assert.assertTrue( - shuffleResourceTracker - .getJobResourceListener(jobID2) - .getResultPartitionMap() - .get(3) - .isEmpty()); - } - - public ShufflePartitionLocationInfo mockShufflePartitionLocationInfo(WorkerInfo workerInfo) { - ShufflePartitionLocationInfo shufflePartitionLocationInfo = - new ShufflePartitionLocationInfo(workerInfo); - - List primaryLocations = new ArrayList<>(); - primaryLocations.add(mockShufflePartitionLocationInfo(1)); - primaryLocations.add(mockShufflePartitionLocationInfo(2)); - - List replicaLocations = new ArrayList<>(); - replicaLocations.add(mockShufflePartitionLocationInfo(3)); - replicaLocations.add(mockShufflePartitionLocationInfo(4)); - - shufflePartitionLocationInfo.addPrimaryPartitions(primaryLocations); - shufflePartitionLocationInfo.addReplicaPartitions(replicaLocations); - return shufflePartitionLocationInfo; - } - - public JobShuffleContext createJobShuffleContext(JobID jobId) { - return new JobShuffleContext() { - @Override - public JobID getJobId() { - return jobId; - } - - @Override - public CompletableFuture stopTrackingAndReleasePartitions( - Collection collection) { - return CompletableFuture.completedFuture(null); - } - }; - } - - private PartitionLocation mockShufflePartitionLocationInfo(int partitionId) { - return new PartitionLocation( - partitionId, -1, "mock", -1, -1, -1, -1, PartitionLocation.Mode.PRIMARY); - } -} diff --git a/dev/dependencies.sh b/dev/dependencies.sh index f0bd888ace8..a9d7ab30040 100755 --- a/dev/dependencies.sh +++ b/dev/dependencies.sh @@ -172,14 +172,6 @@ case "$MODULE" in MVN_MODULES="client-spark/spark-3-4" SBT_PROJECT="celeborn-client-spark-3" ;; - "flink-1.14") - MVN_MODULES="client-flink/flink-1.14" - SBT_PROJECT="celeborn-client-flink-1_14" - ;; - "flink-1.15") - MVN_MODULES="client-flink/flink-1.15" - SBT_PROJECT="celeborn-client-flink-1_15" - ;; "flink-1.16") MVN_MODULES="client-flink/flink-1.16" SBT_PROJECT="celeborn-client-flink-1_16" diff --git a/dev/deps/dependencies-client-flink-1.14 b/dev/deps/dependencies-client-flink-1.14 deleted file mode 100644 index 4f786dd8046..00000000000 --- a/dev/deps/dependencies-client-flink-1.14 +++ /dev/null @@ -1,81 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -RoaringBitmap/1.0.6//RoaringBitmap-1.0.6.jar -commons-crypto/1.0.0//commons-crypto-1.0.0.jar -commons-io/2.17.0//commons-io-2.17.0.jar -commons-lang3/3.17.0//commons-lang3-3.17.0.jar -commons-logging/1.1.3//commons-logging-1.1.3.jar -failureaccess/1.0.2//failureaccess-1.0.2.jar -guava/33.1.0-jre//guava-33.1.0-jre.jar -hadoop-client-api/3.3.6//hadoop-client-api-3.3.6.jar -hadoop-client-runtime/3.3.6//hadoop-client-runtime-3.3.6.jar -jackson-annotations/2.15.3//jackson-annotations-2.15.3.jar -jackson-core/2.15.3//jackson-core-2.15.3.jar -jackson-databind/2.15.3//jackson-databind-2.15.3.jar -jackson-module-scala_2.12/2.15.3//jackson-module-scala_2.12-2.15.3.jar -jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar -jsr305/1.3.9//jsr305-1.3.9.jar -jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar -leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar -maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/4.2.25//metrics-core-4.2.25.jar -metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar -metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar -netty-all/4.1.115.Final//netty-all-4.1.115.Final.jar -netty-buffer/4.1.115.Final//netty-buffer-4.1.115.Final.jar -netty-codec-dns/4.1.115.Final//netty-codec-dns-4.1.115.Final.jar -netty-codec-haproxy/4.1.115.Final//netty-codec-haproxy-4.1.115.Final.jar -netty-codec-http/4.1.115.Final//netty-codec-http-4.1.115.Final.jar -netty-codec-http2/4.1.115.Final//netty-codec-http2-4.1.115.Final.jar -netty-codec-memcache/4.1.115.Final//netty-codec-memcache-4.1.115.Final.jar -netty-codec-mqtt/4.1.115.Final//netty-codec-mqtt-4.1.115.Final.jar -netty-codec-redis/4.1.115.Final//netty-codec-redis-4.1.115.Final.jar -netty-codec-smtp/4.1.115.Final//netty-codec-smtp-4.1.115.Final.jar -netty-codec-socks/4.1.115.Final//netty-codec-socks-4.1.115.Final.jar -netty-codec-stomp/4.1.115.Final//netty-codec-stomp-4.1.115.Final.jar -netty-codec-xml/4.1.115.Final//netty-codec-xml-4.1.115.Final.jar -netty-codec/4.1.115.Final//netty-codec-4.1.115.Final.jar -netty-common/4.1.115.Final//netty-common-4.1.115.Final.jar -netty-handler-proxy/4.1.115.Final//netty-handler-proxy-4.1.115.Final.jar -netty-handler/4.1.115.Final//netty-handler-4.1.115.Final.jar -netty-resolver-dns-classes-macos/4.1.115.Final//netty-resolver-dns-classes-macos-4.1.115.Final.jar -netty-resolver-dns-native-macos/4.1.115.Final/osx-aarch_64/netty-resolver-dns-native-macos-4.1.115.Final-osx-aarch_64.jar -netty-resolver-dns-native-macos/4.1.115.Final/osx-x86_64/netty-resolver-dns-native-macos-4.1.115.Final-osx-x86_64.jar -netty-resolver-dns/4.1.115.Final//netty-resolver-dns-4.1.115.Final.jar -netty-resolver/4.1.115.Final//netty-resolver-4.1.115.Final.jar -netty-transport-classes-epoll/4.1.115.Final//netty-transport-classes-epoll-4.1.115.Final.jar -netty-transport-classes-kqueue/4.1.115.Final//netty-transport-classes-kqueue-4.1.115.Final.jar -netty-transport-native-epoll/4.1.115.Final/linux-aarch_64/netty-transport-native-epoll-4.1.115.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.1.115.Final/linux-riscv64/netty-transport-native-epoll-4.1.115.Final-linux-riscv64.jar -netty-transport-native-epoll/4.1.115.Final/linux-x86_64/netty-transport-native-epoll-4.1.115.Final-linux-x86_64.jar -netty-transport-native-kqueue/4.1.115.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.115.Final-osx-aarch_64.jar -netty-transport-native-kqueue/4.1.115.Final/osx-x86_64/netty-transport-native-kqueue-4.1.115.Final-osx-x86_64.jar -netty-transport-native-unix-common/4.1.115.Final//netty-transport-native-unix-common-4.1.115.Final.jar -netty-transport-rxtx/4.1.115.Final//netty-transport-rxtx-4.1.115.Final.jar -netty-transport-sctp/4.1.115.Final//netty-transport-sctp-4.1.115.Final.jar -netty-transport-udt/4.1.115.Final//netty-transport-udt-4.1.115.Final.jar -netty-transport/4.1.115.Final//netty-transport-4.1.115.Final.jar -paranamer/2.8//paranamer-2.8.jar -protobuf-java/3.25.5//protobuf-java-3.25.5.jar -scala-library/2.12.18//scala-library-2.12.18.jar -scala-reflect/2.12.18//scala-reflect-2.12.18.jar -slf4j-api/1.7.36//slf4j-api-1.7.36.jar -snakeyaml/2.2//snakeyaml-2.2.jar -snappy-java/1.1.10.5//snappy-java-1.1.10.5.jar -zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar diff --git a/dev/deps/dependencies-client-flink-1.15 b/dev/deps/dependencies-client-flink-1.15 deleted file mode 100644 index 4f786dd8046..00000000000 --- a/dev/deps/dependencies-client-flink-1.15 +++ /dev/null @@ -1,81 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -RoaringBitmap/1.0.6//RoaringBitmap-1.0.6.jar -commons-crypto/1.0.0//commons-crypto-1.0.0.jar -commons-io/2.17.0//commons-io-2.17.0.jar -commons-lang3/3.17.0//commons-lang3-3.17.0.jar -commons-logging/1.1.3//commons-logging-1.1.3.jar -failureaccess/1.0.2//failureaccess-1.0.2.jar -guava/33.1.0-jre//guava-33.1.0-jre.jar -hadoop-client-api/3.3.6//hadoop-client-api-3.3.6.jar -hadoop-client-runtime/3.3.6//hadoop-client-runtime-3.3.6.jar -jackson-annotations/2.15.3//jackson-annotations-2.15.3.jar -jackson-core/2.15.3//jackson-core-2.15.3.jar -jackson-databind/2.15.3//jackson-databind-2.15.3.jar -jackson-module-scala_2.12/2.15.3//jackson-module-scala_2.12-2.15.3.jar -jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar -jsr305/1.3.9//jsr305-1.3.9.jar -jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar -leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar -maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/4.2.25//metrics-core-4.2.25.jar -metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar -metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar -netty-all/4.1.115.Final//netty-all-4.1.115.Final.jar -netty-buffer/4.1.115.Final//netty-buffer-4.1.115.Final.jar -netty-codec-dns/4.1.115.Final//netty-codec-dns-4.1.115.Final.jar -netty-codec-haproxy/4.1.115.Final//netty-codec-haproxy-4.1.115.Final.jar -netty-codec-http/4.1.115.Final//netty-codec-http-4.1.115.Final.jar -netty-codec-http2/4.1.115.Final//netty-codec-http2-4.1.115.Final.jar -netty-codec-memcache/4.1.115.Final//netty-codec-memcache-4.1.115.Final.jar -netty-codec-mqtt/4.1.115.Final//netty-codec-mqtt-4.1.115.Final.jar -netty-codec-redis/4.1.115.Final//netty-codec-redis-4.1.115.Final.jar -netty-codec-smtp/4.1.115.Final//netty-codec-smtp-4.1.115.Final.jar -netty-codec-socks/4.1.115.Final//netty-codec-socks-4.1.115.Final.jar -netty-codec-stomp/4.1.115.Final//netty-codec-stomp-4.1.115.Final.jar -netty-codec-xml/4.1.115.Final//netty-codec-xml-4.1.115.Final.jar -netty-codec/4.1.115.Final//netty-codec-4.1.115.Final.jar -netty-common/4.1.115.Final//netty-common-4.1.115.Final.jar -netty-handler-proxy/4.1.115.Final//netty-handler-proxy-4.1.115.Final.jar -netty-handler/4.1.115.Final//netty-handler-4.1.115.Final.jar -netty-resolver-dns-classes-macos/4.1.115.Final//netty-resolver-dns-classes-macos-4.1.115.Final.jar -netty-resolver-dns-native-macos/4.1.115.Final/osx-aarch_64/netty-resolver-dns-native-macos-4.1.115.Final-osx-aarch_64.jar -netty-resolver-dns-native-macos/4.1.115.Final/osx-x86_64/netty-resolver-dns-native-macos-4.1.115.Final-osx-x86_64.jar -netty-resolver-dns/4.1.115.Final//netty-resolver-dns-4.1.115.Final.jar -netty-resolver/4.1.115.Final//netty-resolver-4.1.115.Final.jar -netty-transport-classes-epoll/4.1.115.Final//netty-transport-classes-epoll-4.1.115.Final.jar -netty-transport-classes-kqueue/4.1.115.Final//netty-transport-classes-kqueue-4.1.115.Final.jar -netty-transport-native-epoll/4.1.115.Final/linux-aarch_64/netty-transport-native-epoll-4.1.115.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.1.115.Final/linux-riscv64/netty-transport-native-epoll-4.1.115.Final-linux-riscv64.jar -netty-transport-native-epoll/4.1.115.Final/linux-x86_64/netty-transport-native-epoll-4.1.115.Final-linux-x86_64.jar -netty-transport-native-kqueue/4.1.115.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.115.Final-osx-aarch_64.jar -netty-transport-native-kqueue/4.1.115.Final/osx-x86_64/netty-transport-native-kqueue-4.1.115.Final-osx-x86_64.jar -netty-transport-native-unix-common/4.1.115.Final//netty-transport-native-unix-common-4.1.115.Final.jar -netty-transport-rxtx/4.1.115.Final//netty-transport-rxtx-4.1.115.Final.jar -netty-transport-sctp/4.1.115.Final//netty-transport-sctp-4.1.115.Final.jar -netty-transport-udt/4.1.115.Final//netty-transport-udt-4.1.115.Final.jar -netty-transport/4.1.115.Final//netty-transport-4.1.115.Final.jar -paranamer/2.8//paranamer-2.8.jar -protobuf-java/3.25.5//protobuf-java-3.25.5.jar -scala-library/2.12.18//scala-library-2.12.18.jar -scala-reflect/2.12.18//scala-reflect-2.12.18.jar -slf4j-api/1.7.36//slf4j-api-1.7.36.jar -snakeyaml/2.2//snakeyaml-2.2.jar -snappy-java/1.1.10.5//snappy-java-1.1.10.5.jar -zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar diff --git a/dev/reformat b/dev/reformat index 85c4cde0869..260a3ab77e6 100755 --- a/dev/reformat +++ b/dev/reformat @@ -23,8 +23,6 @@ PROJECT_DIR="$(cd "`dirname "$0"`/.."; pwd)" if [ "$1" == "--web" ]; then ${PROJECT_DIR}/build/mvn clean install -pl web else - ${PROJECT_DIR}/build/mvn spotless:apply -Pflink-1.14 - ${PROJECT_DIR}/build/mvn spotless:apply -Pflink-1.15 ${PROJECT_DIR}/build/mvn spotless:apply -Pflink-1.16 ${PROJECT_DIR}/build/mvn spotless:apply -Pflink-1.17 ${PROJECT_DIR}/build/mvn spotless:apply -Pflink-1.18 diff --git a/docs/README.md b/docs/README.md index ea61b7e9c71..38b4fb3e117 100644 --- a/docs/README.md +++ b/docs/README.md @@ -122,7 +122,7 @@ INFO [async-reply] Controller: CommitFiles for local-1690000152711-0 success wit **Important: Only Flink batch jobs are supported for now.** #### Copy Celeborn Client to Flink's lib -Celeborn release binary contains clients for Flink 1.14.x, Flink 1.15.x, Flink 1.16.x, Flink 1.17.x, Flink 1.18.x, Flink 1.19.x and Flink 1.20.x, copy the corresponding client jar into Flink's +Celeborn release binary contains clients for Flink 1.16.x, Flink 1.17.x, Flink 1.18.x, Flink 1.19.x and Flink 1.20.x, copy the corresponding client jar into Flink's `lib/` directory: ```shell cp $CELEBORN_HOME/flink/celeborn-client-flink--shaded_-.jar $FLINK_HOME/lib/ @@ -130,7 +130,7 @@ cp $CELEBORN_HOME/flink/celeborn-client-flink--shaded_-shaded_-.jar` to `$FLINK_HOME/lib/`. ### Flink Configuration -Celeborn supports two Flink integration strategies: remote shuffle service (since Flink 1.14) and [hybrid shuffle](https://nightlies.apache.org/flink/flink-docs-stable/docs/ops/batch/batch_shuffle/#hybrid-shuffle) (since Flink 1.20). +Celeborn supports two Flink integration strategies: remote shuffle service (since Flink 1.16) and [hybrid shuffle](https://nightlies.apache.org/flink/flink-docs-stable/docs/ops/batch/batch_shuffle/#hybrid-shuffle) (since Flink 1.20). To use Celeborn, you can choose one of them and add the following Flink configurations. diff --git a/docs/developers/overview.md b/docs/developers/overview.md index c7b866b011f..b265f4d9edb 100644 --- a/docs/developers/overview.md +++ b/docs/developers/overview.md @@ -89,7 +89,7 @@ Celeborn's primary components(i.e. Master, Worker, Client) are engine irrelevant and easy to implement plugins for various engines. Currently, Celeborn officially supports [Spark](https://spark.apache.org/)(both Spark 2.x and Spark 3.x), -[Flink](https://flink.apache.org/)(1.14/1.15/1.16/1.17/1.18/1.19), and +[Flink](https://flink.apache.org/)(1.16/1.17/1.18/1.19), and [Gluten](https://github.com/apache/incubator-gluten). Also, developers are integrating Celeborn with other engines, for example [MR3](https://mr3docs.datamonad.com/docs/mr3/). diff --git a/docs/developers/sbt.md b/docs/developers/sbt.md index 0bca08f3752..920e1be0142 100644 --- a/docs/developers/sbt.md +++ b/docs/developers/sbt.md @@ -35,8 +35,6 @@ The following table indicates the compatibility of Celeborn Spark and Flink clie | Spark 3.3 | ❌ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | | Spark 3.4 | ❌ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | | Spark 3.5 | ❌ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| Flink 1.14 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | -| Flink 1.15 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | | Flink 1.16 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | | Flink 1.17 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | | Flink 1.18 | ❌ | ✔ | ✔ | ❌ | ❌ | ❌ | ❌ | @@ -81,17 +79,17 @@ We have adopted the Maven-style profile management for our Client module. For ex [info] celeborn-worker ``` -To enable the Flink 1.15 client module, add `-Pflink-1.15`: +To enable the Flink 1.16 client module, add `-Pflink-1.16`: ``` -# ./build/sbt -Pflink-1.15 projects +# ./build/sbt -Pflink-1.16 projects [info] set current project to celeborn (in build file:/root/celeborn/) [info] In file:/root/celeborn/ [info] * celeborn [info] celeborn-client -[info] celeborn-client-flink-1_15 -[info] celeborn-client-flink-1_15-shaded +[info] celeborn-client-flink-1_16 +[info] celeborn-client-flink-1_16-shaded [info] celeborn-common [info] celeborn-flink-common [info] celeborn-flink-group @@ -116,15 +114,15 @@ $ # Or, you can use sbt directly with the `-Pspark-3.3` profile: $ ./build/sbt -Pspark-3.3 celeborn-client-spark-3-shaded/assembly ``` -Similarly, you can build the Flink 1.15 client assembly jar using the following commands: +Similarly, you can build the Flink 1.16 client assembly jar using the following commands: ```shell -$ ./build/sbt -Pflink-1.15 -> project celeborn-client-flink-1_15-shaded +$ ./build/sbt -Pflink-1.16 +> project celeborn-client-flink-1_16-shaded > assembly -$ # Or, you can use sbt directly with the `-Pflink-1.15` profile: -$ ./build/sbt -Pflink-1.15 celeborn-client-flink-1_15-shaded/assembly +$ # Or, you can use sbt directly with the `-Pflink-1.16` profile: +$ ./build/sbt -Pflink-1.16 celeborn-client-flink-1_16-shaded/assembly ``` By executing these commands, you will create assembly jar files for the respective Spark and Flink client modules. The assembly jar bundles all the dependencies, allowing the client module to be used independently with all required dependencies included. @@ -162,13 +160,13 @@ $ # only run spark client related modules tests $ ./build/sbt -Pspark-3.3 celeborn-spark-group/test ``` -Similarly, to run the test cases for the Flink 1.15 client, use the following command: +Similarly, to run the test cases for the Flink 1.16 client, use the following command: ```shell -$ ./build/sbt -Pflink-1.15 test +$ ./build/sbt -Pflink-1.16 test $ # only run flink client related modules tests -$ ./build/sbt -Pflink-1.15 celeborn-flink-group/test +$ ./build/sbt -Pflink-1.16 celeborn-flink-group/test ``` ### Running Individual Tests diff --git a/docs/migration.md b/docs/migration.md index 570f00f7c7f..7390fe8d717 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -79,6 +79,8 @@ license: | topK(50, sum by (applicationId) (metrics_diskBytesWritten_Value{role="worker", applicationId!=""})) ``` +- Since 0.6.0, the out-of-dated Flink 1.14 and Flink 1.15 have been removed from the official support list. + ## Upgrading from 0.5.0 to 0.5.1 - Since 0.5.1, Celeborn master REST API `/exclude` request uses media type `application/x-www-form-urlencoded` instead of `text/plain`. diff --git a/pom.xml b/pom.xml index 645090e960c..326fdea206c 100644 --- a/pom.xml +++ b/pom.xml @@ -66,7 +66,7 @@ ${java.version} 3.9.9 - 1.14.6 + 1.20.0 3.3.4 @@ -1663,45 +1663,6 @@ - - flink-1.14 - - client-flink/common - client-flink/flink-1.14 - client-flink/flink-1.14-shaded - tests/flink-it - - - 1.14.6 - 2.12 - celeborn-client-flink-1.14_${scala.binary.version} - flink-streaming-java_${scala.binary.version} - flink-clients_${scala.binary.version} - flink-scala_${scala.binary.version} - flink-runtime-web_${scala.binary.version} - - - - - flink-1.15 - - client-flink/common - client-flink/flink-1.15 - client-flink/flink-1.15-shaded - tests/flink-it - - - 1.15.4 - 1.15 - 2.12 - celeborn-client-flink-1.15_${scala.binary.version} - flink-streaming-java - flink-clients - flink-scala_${scala.binary.version} - flink-runtime-web - - - flink-1.16 diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 80c45084e8a..cd44db10334 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -480,8 +480,6 @@ object Utils { val FLINK_VERSION = profiles.filter(_.startsWith("flink")).headOption lazy val flinkClientProjects = FLINK_VERSION match { - case Some("flink-1.14") => Some(Flink114) - case Some("flink-1.15") => Some(Flink115) case Some("flink-1.16") => Some(Flink116) case Some("flink-1.17") => Some(Flink117) case Some("flink-1.18") => Some(Flink118) @@ -1077,30 +1075,6 @@ trait SparkClientProjects { // Flink Client // //////////////////////////////////////////////////////// -object Flink114 extends FlinkClientProjects { - val flinkVersion = "1.14.6" - - // note that SBT does not allow using the period symbol (.) in project names. - val flinkClientProjectPath = "client-flink/flink-1.14" - val flinkClientProjectName = "celeborn-client-flink-1_14" - val flinkClientShadedProjectPath: String = "client-flink/flink-1.14-shaded" - val flinkClientShadedProjectName: String = "celeborn-client-flink-1_14-shaded" - - override lazy val flinkStreamingDependency: ModuleID = "org.apache.flink" %% "flink-streaming-java" % flinkVersion % "test" - override lazy val flinkClientsDependency: ModuleID = "org.apache.flink" %% "flink-clients" % flinkVersion % "test" - override lazy val flinkRuntimeWebDependency: ModuleID = "org.apache.flink" %% "flink-runtime-web" % flinkVersion % "test" -} - -object Flink115 extends FlinkClientProjects { - val flinkVersion = "1.15.4" - - // note that SBT does not allow using the period symbol (.) in project names. - val flinkClientProjectPath = "client-flink/flink-1.15" - val flinkClientProjectName = "celeborn-client-flink-1_15" - val flinkClientShadedProjectPath: String = "client-flink/flink-1.15-shaded" - val flinkClientShadedProjectName: String = "celeborn-client-flink-1_15-shaded" -} - object Flink116 extends FlinkClientProjects { val flinkVersion = "1.16.3" @@ -1176,8 +1150,6 @@ trait FlinkClientProjects { // 1.19.1 -> 1.19 // 1.18.1 -> 1.18 // 1.17.2 -> 1.17 - // 1.15.4 -> 1.15 - // 1.14.6 -> 1.14 lazy val flinkMajorVersion: String = flinkVersion.split("\\.").take(2).reduce(_ + "." + _) // the output would be something like: celeborn-client-flink-1.17-shaded_2.12-0.4.0-SNAPSHOT.jar diff --git a/tests/flink-it/src/test/java/org/apache/celeborn/tests/flink/FlinkVersion.java b/tests/flink-it/src/test/java/org/apache/celeborn/tests/flink/FlinkVersion.java index 81eb392710e..7a9d76e4de3 100644 --- a/tests/flink-it/src/test/java/org/apache/celeborn/tests/flink/FlinkVersion.java +++ b/tests/flink-it/src/test/java/org/apache/celeborn/tests/flink/FlinkVersion.java @@ -23,8 +23,6 @@ /** All supported flink versions. */ @Public public enum FlinkVersion { - v1_14("1.14"), - v1_15("1.15"), v1_16("1.16"), v1_17("1.17"), v1_18("1.18"), @@ -39,10 +37,6 @@ public enum FlinkVersion { public static FlinkVersion fromVersionStr(String versionStr) { switch (versionStr) { - case "1.14": - return v1_14; - case "1.15": - return v1_15; case "1.16": return v1_16; case "1.17":