From 07cdcba44ed99cd90f109ba62df6d50ba67bbf59 Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Fri, 27 Dec 2024 16:18:19 +0800 Subject: [PATCH] [#5585] improvement(bundles): Refactor bundle jars and provide core jars that does not contains hadoop-{aws,gcp,aliyun,azure} (#5806) ### What changes were proposed in this pull request? Provide another kind of bundle jars that does not contains hadoop-{aws,gcp,aliyun,azure} like aws-mini, gcp-mini. ### Why are the changes needed? To make it works in a wide range of Hadoop version Fix: #5585 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Existing UTs and ITs --- .../authorization-chain/build.gradle.kts | 10 +-- .../authorization-ranger/build.gradle.kts | 11 +-- build.gradle.kts | 8 +- bundles/aliyun-bundle/build.gradle.kts | 38 +++----- bundles/aliyun/build.gradle.kts | 87 +++++++++++++++++++ .../oss/credential/OSSSecretKeyProvider.java | 0 .../oss/credential/OSSTokenProvider.java | 0 .../oss/credential/policy/Condition.java | 0 .../oss/credential/policy/Effect.java | 0 .../oss/credential/policy/Policy.java | 0 .../oss/credential/policy/Statement.java | 0 .../oss/credential/policy/StringLike.java | 0 .../oss/fs/OSSFileSystemProvider.java | 0 ...itino.catalog.hadoop.fs.FileSystemProvider | 0 ...he.gravitino.credential.CredentialProvider | 0 bundles/aws-bundle/build.gradle.kts | 24 ++--- bundles/aws/build.gradle.kts | 68 +++++++++++++++ .../s3/credential/S3SecretKeyProvider.java | 0 .../s3/credential/S3TokenProvider.java | 0 .../gravitino/s3/fs/S3FileSystemProvider.java | 62 ++++++++++++- ...itino.catalog.hadoop.fs.FileSystemProvider | 0 ...he.gravitino.credential.CredentialProvider | 0 bundles/azure-bundle/build.gradle.kts | 25 ++---- bundles/azure/build.gradle.kts | 72 +++++++++++++++ .../abs/credential/ADLSLocationUtils.java | 0 .../abs/credential/ADLSTokenProvider.java | 0 .../credential/AzureAccountKeyProvider.java | 0 .../abs/fs/AzureFileSystemProvider.java | 0 ...itino.catalog.hadoop.fs.FileSystemProvider | 0 ...he.gravitino.credential.CredentialProvider | 0 bundles/gcp-bundle/build.gradle.kts | 24 ++--- .../org.apache.hadoop.fs.FileSystem | 0 bundles/gcp/build.gradle.kts | 69 +++++++++++++++ .../gcs/credential/GCSTokenProvider.java | 0 .../gcs/fs/GCSFileSystemProvider.java | 7 +- ...itino.catalog.hadoop.fs.FileSystemProvider | 0 ...he.gravitino.credential.CredentialProvider | 0 .../gravitino/catalog/hadoop/Constants.java | 26 ++++++ catalogs/catalog-hadoop/build.gradle.kts | 47 +++------- catalogs/catalog-hive/build.gradle.kts | 3 + catalogs/hadoop-common/build.gradle.kts | 5 +- .../catalog/hadoop/fs/FileSystemUtils.java | 6 +- .../build.gradle.kts | 3 + clients/filesystem-hadoop3/build.gradle.kts | 26 +++--- .../hadoop/GravitinoVirtualFileSystem.java | 24 +++++ docs/hadoop-catalog.md | 14 ++- docs/how-to-use-gvfs.md | 65 ++++++++++++-- gradle/libs.versions.toml | 12 ++- iceberg/iceberg-rest-server/build.gradle.kts | 8 +- integration-test-common/build.gradle.kts | 8 +- settings.gradle.kts | 10 +-- 51 files changed, 570 insertions(+), 192 deletions(-) create mode 100644 bundles/aliyun/build.gradle.kts rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider (100%) rename bundles/{aliyun-bundle => aliyun}/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider (100%) create mode 100644 bundles/aws/build.gradle.kts rename bundles/{aws-bundle => aws}/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java (100%) rename bundles/{aws-bundle => aws}/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java (100%) rename bundles/{aws-bundle => aws}/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java (53%) rename bundles/{aws-bundle => aws}/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider (100%) rename bundles/{aws-bundle => aws}/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider (100%) create mode 100644 bundles/azure/build.gradle.kts rename bundles/{azure-bundle => azure}/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java (100%) rename bundles/{azure-bundle => azure}/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java (100%) rename bundles/{azure-bundle => azure}/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java (100%) rename bundles/{azure-bundle => azure}/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java (100%) rename bundles/{azure-bundle => azure}/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider (100%) rename bundles/{azure-bundle => azure}/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider (100%) rename bundles/gcp-bundle/src/main/resources/{META-INF/services => }/org.apache.hadoop.fs.FileSystem (100%) create mode 100644 bundles/gcp/build.gradle.kts rename bundles/{gcp-bundle => gcp}/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java (100%) rename bundles/{gcp-bundle => gcp}/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java (85%) rename bundles/{gcp-bundle => gcp}/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider (100%) rename bundles/{gcp-bundle => gcp}/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider (100%) create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java rename catalogs/{catalog-hadoop => hadoop-common}/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java (95%) diff --git a/authorizations/authorization-chain/build.gradle.kts b/authorizations/authorization-chain/build.gradle.kts index d5cd160742c..e14cfa05ba9 100644 --- a/authorizations/authorization-chain/build.gradle.kts +++ b/authorizations/authorization-chain/build.gradle.kts @@ -81,6 +81,7 @@ dependencies { exclude("net.java.dev.jna") exclude("javax.ws.rs") exclude("org.eclipse.jetty") + exclude("org.apache.hadoop", "hadoop-common") } testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { @@ -93,11 +94,10 @@ dependencies { testImplementation("org.apache.kyuubi:kyuubi-spark-authz-shaded_$scalaVersion:$kyuubiVersion") { exclude("com.sun.jersey") } - testImplementation(libs.hadoop3.client) - testImplementation(libs.hadoop3.common) { - exclude("com.sun.jersey") - exclude("javax.servlet", "servlet-api") - } + + testImplementation(libs.hadoop3.client.api) + testImplementation(libs.hadoop3.client.runtime) + testImplementation(libs.hadoop3.hdfs) { exclude("com.sun.jersey") exclude("javax.servlet", "servlet-api") diff --git a/authorizations/authorization-ranger/build.gradle.kts b/authorizations/authorization-ranger/build.gradle.kts index d410b1ee8d4..8cc82250c23 100644 --- a/authorizations/authorization-ranger/build.gradle.kts +++ b/authorizations/authorization-ranger/build.gradle.kts @@ -67,7 +67,12 @@ dependencies { exclude("net.java.dev.jna") exclude("javax.ws.rs") exclude("org.eclipse.jetty") + // Conflicts with hadoop-client-api used in hadoop-catalog. + exclude("org.apache.hadoop", "hadoop-common") } + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) + implementation(libs.rome) compileOnly(libs.lombok) testRuntimeOnly(libs.junit.jupiter.engine) @@ -92,11 +97,7 @@ dependencies { testImplementation("org.apache.kyuubi:kyuubi-spark-authz-shaded_$scalaVersion:$kyuubiVersion") { exclude("com.sun.jersey") } - testImplementation(libs.hadoop3.client) - testImplementation(libs.hadoop3.common) { - exclude("com.sun.jersey") - exclude("javax.servlet", "servlet-api") - } + testImplementation(libs.hadoop3.hdfs) { exclude("com.sun.jersey") exclude("javax.servlet", "servlet-api") diff --git a/build.gradle.kts b/build.gradle.kts index c64997f3a90..154b4e7f776 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -779,7 +779,7 @@ tasks { !it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" && it.name != "integration-test" && it.name != "bundled-catalog" && !it.name.startsWith("flink") && it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") && - it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle" && it.name != "hadoop-common" + it.parent?.name != "bundles" && it.name != "hadoop-common" ) { from(it.configurations.runtimeClasspath) into("distribution/package/libs") @@ -799,10 +799,8 @@ tasks { !it.name.startsWith("integration-test") && !it.name.startsWith("flink") && !it.name.startsWith("trino-connector") && - it.name != "bundled-catalog" && - it.name != "hive-metastore-common" && it.name != "gcp-bundle" && - it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle" && - it.name != "hadoop-common" && it.name != "docs" + it.name != "hive-metastore-common" && + it.name != "docs" && it.name != "hadoop-common" && it.parent?.name != "bundles" ) { dependsOn("${it.name}:build") from("${it.name}/build/libs") diff --git a/bundles/aliyun-bundle/build.gradle.kts b/bundles/aliyun-bundle/build.gradle.kts index bc2d21a6851..c8377285599 100644 --- a/bundles/aliyun-bundle/build.gradle.kts +++ b/bundles/aliyun-bundle/build.gradle.kts @@ -25,32 +25,12 @@ plugins { } dependencies { - compileOnly(project(":api")) - compileOnly(project(":core")) - compileOnly(project(":catalogs:catalog-common")) - compileOnly(project(":catalogs:catalog-hadoop")) - compileOnly(project(":catalogs:hadoop-common")) { - exclude("*") - } - compileOnly(libs.hadoop3.common) - - implementation(libs.aliyun.credentials.sdk) + implementation(project(":bundles:aliyun")) + implementation(libs.commons.collections3) + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) implementation(libs.hadoop3.oss) - - // Aliyun oss SDK depends on this package, and JDK >= 9 requires manual add - // https://www.alibabacloud.com/help/en/oss/developer-reference/java-installation?spm=a2c63.p38356.0.i1 - implementation(libs.sun.activation) - - // oss needs StringUtils from commons-lang3 or the following error will occur in 3.3.0 - // java.lang.NoClassDefFoundError: org/apache/commons/lang3/StringUtils - // org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystemStore.initialize(AliyunOSSFileSystemStore.java:111) - // org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem.initialize(AliyunOSSFileSystem.java:323) - // org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3611) - implementation(libs.commons.lang3) - - implementation(project(":catalogs:catalog-common")) { - exclude("*") - } + implementation(libs.httpclient) } tasks.withType(ShadowJar::class.java) { @@ -60,8 +40,12 @@ tasks.withType(ShadowJar::class.java) { mergeServiceFiles() // Relocate dependencies to avoid conflicts - relocate("org.jdom", "org.apache.gravitino.shaded.org.jdom") - relocate("org.apache.commons.lang3", "org.apache.gravitino.shaded.org.apache.commons.lang3") + relocate("org.jdom", "org.apache.gravitino.aliyun.shaded.org.jdom") + relocate("org.apache.commons.lang3", "org.apache.gravitino.aliyun.shaded.org.apache.commons.lang3") + relocate("com.fasterxml.jackson", "org.apache.gravitino.aliyun.shaded.com.fasterxml.jackson") + relocate("com.google.common", "org.apache.gravitino.aliyun.shaded.com.google.common") + relocate("org.apache.http", "org.apache.gravitino.aliyun.shaded.org.apache.http") + relocate("org.apache.commons.collections", "org.apache.gravitino.aliyun.shaded.org.apache.commons.collections") } tasks.jar { diff --git a/bundles/aliyun/build.gradle.kts b/bundles/aliyun/build.gradle.kts new file mode 100644 index 00000000000..f4d38d40b92 --- /dev/null +++ b/bundles/aliyun/build.gradle.kts @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + compileOnly(project(":api")) + compileOnly(project(":catalogs:catalog-common")) + compileOnly(project(":catalogs:catalog-hadoop")) + compileOnly(project(":core")) + compileOnly(libs.hadoop3.client.api) + compileOnly(libs.hadoop3.client.runtime) + compileOnly(libs.hadoop3.oss) + + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } + implementation(project(":catalogs:hadoop-common")) { + exclude("*") + } + + implementation(libs.aliyun.credentials.sdk) + implementation(libs.commons.collections3) + + // oss needs StringUtils from commons-lang3 or the following error will occur in 3.3.0 + // java.lang.NoClassDefFoundError: org/apache/commons/lang3/StringUtils + // org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystemStore.initialize(AliyunOSSFileSystemStore.java:111) + // org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem.initialize(AliyunOSSFileSystem.java:323) + // org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3611) + implementation(libs.commons.lang3) + implementation(libs.guava) + + implementation(libs.httpclient) + implementation(libs.jackson.databind) + implementation(libs.jackson.annotations) + implementation(libs.jackson.datatype.jdk8) + implementation(libs.jackson.datatype.jsr310) + + // Aliyun oss SDK depends on this package, and JDK >= 9 requires manual add + // https://www.alibabacloud.com/help/en/oss/developer-reference/java-installation?spm=a2c63.p38356.0.i1 + implementation(libs.sun.activation) +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + mergeServiceFiles() + + // Relocate dependencies to avoid conflicts + relocate("org.jdom", "org.apache.gravitino.aliyun.shaded.org.jdom") + relocate("org.apache.commons.lang3", "org.apache.gravitino.aliyun.shaded.org.apache.commons.lang3") + relocate("com.fasterxml.jackson", "org.apache.gravitino.aliyun.shaded.com.fasterxml.jackson") + relocate("com.google.common", "org.apache.gravitino.aliyun.shaded.com.google.common") + relocate("org.apache.http", "org.apache.gravitino.aliyun.shaded.org.apache.http") + relocate("org.apache.commons.collections", "org.apache.gravitino.aliyun.shaded.org.apache.commons.collections") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-hadoop:runtimeJars") +} diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java similarity index 100% rename from bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java rename to bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java diff --git a/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider similarity index 100% rename from bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider rename to bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider diff --git a/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider b/bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider similarity index 100% rename from bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider rename to bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider diff --git a/bundles/aws-bundle/build.gradle.kts b/bundles/aws-bundle/build.gradle.kts index 3af5c8b4f38..35b1e22a4f6 100644 --- a/bundles/aws-bundle/build.gradle.kts +++ b/bundles/aws-bundle/build.gradle.kts @@ -25,30 +25,20 @@ plugins { } dependencies { - compileOnly(project(":api")) - compileOnly(project(":core")) - compileOnly(project(":catalogs:catalog-common")) - compileOnly(project(":catalogs:catalog-hadoop")) - compileOnly(project(":catalogs:hadoop-common")) { - exclude("*") - } - compileOnly(libs.hadoop3.common) - - implementation(libs.aws.iam) - implementation(libs.aws.policy) - implementation(libs.aws.sts) - implementation(libs.commons.lang3) + implementation(project(":bundles:aws")) implementation(libs.hadoop3.aws) - implementation(project(":catalogs:catalog-common")) { - exclude("*") - } + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) } tasks.withType(ShadowJar::class.java) { isZip64 = true configurations = listOf(project.configurations.runtimeClasspath.get()) - relocate("org.apache.commons", "org.apache.gravitino.aws.shaded.org.apache.commons") archiveClassifier.set("") + + relocate("org.apache.commons.lang3", "org.apache.gravitino.aws.shaded.org.apache.commons.lang3") + relocate("com.google.common", "org.apache.gravitino.aws.shaded.com.google.common") + relocate("com.fasterxml.jackson", "org.apache.gravitino.aws.shaded.com.fasterxml.jackson") } tasks.jar { diff --git a/bundles/aws/build.gradle.kts b/bundles/aws/build.gradle.kts new file mode 100644 index 00000000000..45fda5485d5 --- /dev/null +++ b/bundles/aws/build.gradle.kts @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + compileOnly(project(":api")) + compileOnly(project(":catalogs:catalog-common")) + compileOnly(project(":catalogs:catalog-hadoop")) + compileOnly(project(":core")) + compileOnly(libs.hadoop3.aws) + compileOnly(libs.hadoop3.client.api) + compileOnly(libs.hadoop3.client.runtime) + + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } + implementation(project(":catalogs:hadoop-common")) { + exclude("*") + } + + implementation(libs.aws.iam) + implementation(libs.aws.policy) + implementation(libs.aws.sts) + implementation(libs.commons.lang3) + implementation(libs.hadoop3.aws) + implementation(libs.guava) +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + + relocate("org.apache.commons.lang3", "org.apache.gravitino.aws.shaded.org.apache.commons.lang3") + relocate("com.google.common", "org.apache.gravitino.aws.shaded.com.google.common") + relocate("com.fasterxml.jackson", "org.apache.gravitino.aws.shaded.com.fasterxml.jackson") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-hadoop:runtimeJars") +} diff --git a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java b/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java similarity index 100% rename from bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java rename to bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java diff --git a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java b/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java similarity index 100% rename from bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java rename to bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java diff --git a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java similarity index 53% rename from bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java rename to bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java index 0d755c1f564..b7cd569bbf6 100644 --- a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java +++ b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java @@ -19,9 +19,14 @@ package org.apache.gravitino.s3.fs; +import com.amazonaws.auth.AWSCredentialsProvider; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import java.io.IOException; +import java.util.List; import java.util.Map; import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider; import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; @@ -31,9 +36,13 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class S3FileSystemProvider implements FileSystemProvider { + private static final Logger LOGGER = LoggerFactory.getLogger(S3FileSystemProvider.class); + @VisibleForTesting public static final Map GRAVITINO_KEY_TO_S3_HADOOP_KEY = ImmutableMap.of( @@ -41,20 +50,67 @@ public class S3FileSystemProvider implements FileSystemProvider { S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, Constants.ACCESS_KEY, S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, Constants.SECRET_KEY); + // We can't use Constants.AWS_CREDENTIALS_PROVIDER directly, as in 2.7, this key does not exist. + private static final String S3_CREDENTIAL_KEY = "fs.s3a.aws.credentials.provider"; + private static final String S3_SIMPLE_CREDENTIAL = + "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"; + @Override public FileSystem getFileSystem(Path path, Map config) throws IOException { Configuration configuration = new Configuration(); Map hadoopConfMap = FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_S3_HADOOP_KEY); - if (!hadoopConfMap.containsKey(Constants.AWS_CREDENTIALS_PROVIDER)) { - configuration.set( - Constants.AWS_CREDENTIALS_PROVIDER, Constants.ASSUMED_ROLE_CREDENTIALS_DEFAULT); + if (!hadoopConfMap.containsKey(S3_CREDENTIAL_KEY)) { + hadoopConfMap.put(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); } + hadoopConfMap.forEach(configuration::set); + + // Hadoop-aws 2 does not support IAMInstanceCredentialsProvider + checkAndSetCredentialProvider(configuration); + return S3AFileSystem.newInstance(path.toUri(), configuration); } + private void checkAndSetCredentialProvider(Configuration configuration) { + String provides = configuration.get(S3_CREDENTIAL_KEY); + if (provides == null) { + return; + } + + Splitter splitter = Splitter.on(',').trimResults().omitEmptyStrings(); + Joiner joiner = Joiner.on(",").skipNulls(); + // Split the list of providers + List providers = splitter.splitToList(provides); + List validProviders = Lists.newArrayList(); + + for (String provider : providers) { + try { + Class c = Class.forName(provider); + if (AWSCredentialsProvider.class.isAssignableFrom(c)) { + validProviders.add(provider); + } else { + LOGGER.warn( + "Credential provider {} is not a subclass of AWSCredentialsProvider, skipping", + provider); + } + } catch (Exception e) { + LOGGER.warn( + "Credential provider {} not found in the Hadoop runtime, falling back to default", + provider); + configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); + return; + } + } + + if (validProviders.isEmpty()) { + configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL); + } else { + configuration.set(S3_CREDENTIAL_KEY, joiner.join(validProviders)); + } + } + /** * Get the scheme of the FileSystem. Attention, for S3 the schema is "s3a", not "s3". Users should * use "s3a://..." to access S3. diff --git a/bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider similarity index 100% rename from bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider rename to bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider diff --git a/bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider b/bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider similarity index 100% rename from bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider rename to bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider diff --git a/bundles/azure-bundle/build.gradle.kts b/bundles/azure-bundle/build.gradle.kts index 9e4a4add54e..7d9e253ac8a 100644 --- a/bundles/azure-bundle/build.gradle.kts +++ b/bundles/azure-bundle/build.gradle.kts @@ -25,26 +25,10 @@ plugins { } dependencies { - compileOnly(project(":api")) - compileOnly(project(":core")) - compileOnly(project(":catalogs:catalog-common")) - compileOnly(project(":catalogs:catalog-hadoop")) - compileOnly(project(":catalogs:hadoop-common")) { - exclude("*") - } - - compileOnly(libs.hadoop3.common) - - implementation(libs.azure.identity) - implementation(libs.azure.storage.file.datalake) - - implementation(libs.commons.lang3) - // runtime used - implementation(libs.commons.logging) + implementation(project(":bundles:azure")) implementation(libs.hadoop3.abs) - implementation(project(":catalogs:catalog-common")) { - exclude("*") - } + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) } tasks.withType(ShadowJar::class.java) { @@ -56,7 +40,8 @@ tasks.withType(ShadowJar::class.java) { relocate("org.apache.httpcomponents", "org.apache.gravitino.azure.shaded.org.apache.httpcomponents") relocate("org.apache.commons", "org.apache.gravitino.azure.shaded.org.apache.commons") relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml") - relocate("com.google.guava", "org.apache.gravitino.azure.shaded.com.google.guava") + relocate("com.google.common", "org.apache.gravitino.azure.shaded.com.google.common") + relocate("org.eclipse.jetty", "org.apache.gravitino.azure.shaded.org.eclipse.jetty") } tasks.jar { diff --git a/bundles/azure/build.gradle.kts b/bundles/azure/build.gradle.kts new file mode 100644 index 00000000000..59d8cf5f806 --- /dev/null +++ b/bundles/azure/build.gradle.kts @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + compileOnly(project(":api")) + compileOnly(project(":catalogs:catalog-hadoop")) + compileOnly(project(":core")) + + compileOnly(libs.hadoop3.abs) + compileOnly(libs.hadoop3.client.api) + compileOnly(libs.hadoop3.client.runtime) + + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } + implementation(project(":catalogs:hadoop-common")) { + exclude("*") + } + + implementation(libs.azure.identity) + implementation(libs.azure.storage.file.datalake) + + implementation(libs.commons.lang3) + // runtime used + implementation(libs.commons.logging) + implementation(libs.guava) +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + + // Relocate dependencies to avoid conflicts + relocate("org.apache.httpcomponents", "org.apache.gravitino.azure.shaded.org.apache.httpcomponents") + relocate("org.apache.commons", "org.apache.gravitino.azure.shaded.org.apache.commons") + relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml") + relocate("com.google.common", "org.apache.gravitino.azure.shaded.com.google.common") + relocate("org.eclipse.jetty", "org.apache.gravitino.azure.shaded.org.eclipse.jetty") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-hadoop:runtimeJars") +} diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java b/bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java similarity index 100% rename from bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java rename to bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java b/bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java similarity index 100% rename from bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java rename to bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java b/bundles/azure/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java similarity index 100% rename from bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java rename to bundles/azure/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java diff --git a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java b/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java similarity index 100% rename from bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java rename to bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java diff --git a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider similarity index 100% rename from bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider rename to bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider diff --git a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider b/bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider similarity index 100% rename from bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider rename to bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider diff --git a/bundles/gcp-bundle/build.gradle.kts b/bundles/gcp-bundle/build.gradle.kts index bae7411c75e..73efaf9f22c 100644 --- a/bundles/gcp-bundle/build.gradle.kts +++ b/bundles/gcp-bundle/build.gradle.kts @@ -25,25 +25,10 @@ plugins { } dependencies { - compileOnly(project(":api")) - compileOnly(project(":core")) - compileOnly(project(":catalogs:catalog-common")) - compileOnly(project(":catalogs:catalog-hadoop")) - compileOnly(project(":catalogs:hadoop-common")) { - exclude("*") - } - - compileOnly(libs.hadoop3.common) - - implementation(libs.commons.lang3) - // runtime used - implementation(libs.commons.logging) + implementation(project(":bundles:gcp")) + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) implementation(libs.hadoop3.gcs) - implementation(project(":catalogs:catalog-common")) { - exclude("*") - } - implementation(libs.google.auth.http) - implementation(libs.google.auth.credentials) } tasks.withType(ShadowJar::class.java) { @@ -54,8 +39,9 @@ tasks.withType(ShadowJar::class.java) { // Relocate dependencies to avoid conflicts relocate("org.apache.httpcomponents", "org.apache.gravitino.gcp.shaded.org.apache.httpcomponents") relocate("org.apache.commons", "org.apache.gravitino.gcp.shaded.org.apache.commons") - relocate("com.google", "org.apache.gravitino.gcp.shaded.com.google") + relocate("com.google.common", "org.apache.gravitino.gcp.shaded.com.google.common") relocate("com.fasterxml", "org.apache.gravitino.gcp.shaded.com.fasterxml") + relocate("org.eclipse.jetty", "org.apache.gravitino.gcp.shaded.org.eclipse.jetty") } tasks.jar { diff --git a/bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/bundles/gcp-bundle/src/main/resources/org.apache.hadoop.fs.FileSystem similarity index 100% rename from bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem rename to bundles/gcp-bundle/src/main/resources/org.apache.hadoop.fs.FileSystem diff --git a/bundles/gcp/build.gradle.kts b/bundles/gcp/build.gradle.kts new file mode 100644 index 00000000000..6f21dc3d5af --- /dev/null +++ b/bundles/gcp/build.gradle.kts @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + compileOnly(project(":api")) + compileOnly(project(":catalogs:catalog-common")) + compileOnly(project(":catalogs:catalog-hadoop")) + compileOnly(project(":core")) + + compileOnly(libs.hadoop3.client.api) + compileOnly(libs.hadoop3.client.runtime) + + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } + implementation(project(":catalogs:hadoop-common")) { + exclude("*") + } + implementation(libs.commons.lang3) + // runtime used + implementation(libs.commons.logging) + implementation(libs.google.auth.credentials) + implementation(libs.google.auth.http) +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + + // Relocate dependencies to avoid conflicts + relocate("org.apache.httpcomponents", "org.apache.gravitino.gcp.shaded.org.apache.httpcomponents") + relocate("org.apache.commons", "org.apache.gravitino.gcp.shaded.org.apache.commons") + relocate("com.google.common", "org.apache.gravitino.gcp.shaded.com.google.common") + relocate("com.fasterxml", "org.apache.gravitino.gcp.shaded.com.fasterxml") + relocate("com.fasterxml.jackson", "org.apache.gravitino.gcp.shaded.com.fasterxml.jackson") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-hadoop:runtimeJars") +} diff --git a/bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java similarity index 100% rename from bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java rename to bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java diff --git a/bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java similarity index 85% rename from bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java rename to bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java index a07ff3d6ece..0055e167c49 100644 --- a/bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java +++ b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java @@ -18,7 +18,6 @@ */ package org.apache.gravitino.gcs.fs; -import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import java.io.IOException; @@ -29,11 +28,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class GCSFileSystemProvider implements FileSystemProvider { - private static final Logger LOGGER = LoggerFactory.getLogger(GCSFileSystemProvider.class); private static final String GCS_SERVICE_ACCOUNT_JSON_FILE = "fs.gs.auth.service.account.json.keyfile"; @@ -46,8 +42,7 @@ public FileSystem getFileSystem(Path path, Map config) throws IO Configuration configuration = new Configuration(); FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_GCS_HADOOP_KEY) .forEach(configuration::set); - LOGGER.info("Creating GCS file system with config: {}", config); - return GoogleHadoopFileSystem.newInstance(path.toUri(), configuration); + return FileSystem.newInstance(path.toUri(), configuration); } @Override diff --git a/bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider similarity index 100% rename from bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider rename to bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider diff --git a/bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider b/bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider similarity index 100% rename from bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider rename to bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java new file mode 100644 index 00000000000..468728362bb --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.hadoop; + +public class Constants { + + public static final String BUILTIN_LOCAL_FS_PROVIDER = "builtin-local"; + public static final String BUILTIN_HDFS_FS_PROVIDER = "builtin-hdfs"; +} diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index 8873b795046..d599a5e72f1 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -28,43 +28,22 @@ dependencies { implementation(project(":api")) { exclude(group = "*") } - - implementation(project(":core")) { - exclude(group = "*") - } - - implementation(project(":common")) { - exclude(group = "*") - } - implementation(project(":catalogs:catalog-common")) { exclude(group = "*") } - implementation(project(":catalogs:hadoop-common")) { exclude(group = "*") } - - implementation(libs.hadoop3.common) { - exclude("com.sun.jersey") - exclude("javax.servlet", "servlet-api") - exclude("org.eclipse.jetty", "*") - exclude("org.apache.hadoop", "hadoop-auth") - exclude("org.apache.curator", "curator-client") - exclude("org.apache.curator", "curator-framework") - exclude("org.apache.curator", "curator-recipes") - exclude("org.apache.avro", "avro") - exclude("com.sun.jersey", "jersey-servlet") + implementation(project(":common")) { + exclude(group = "*") } - - implementation(libs.hadoop3.client) { - exclude("org.apache.hadoop", "hadoop-mapreduce-client-core") - exclude("org.apache.hadoop", "hadoop-mapreduce-client-jobclient") - exclude("org.apache.hadoop", "hadoop-yarn-api") - exclude("org.apache.hadoop", "hadoop-yarn-client") - exclude("com.squareup.okhttp", "okhttp") + implementation(project(":core")) { + exclude(group = "*") } - + implementation(libs.commons.lang3) + implementation(libs.commons.io) + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) implementation(libs.hadoop3.hdfs) { exclude("com.sun.jersey") exclude("javax.servlet", "servlet-api") @@ -74,20 +53,18 @@ dependencies { exclude("io.netty") exclude("org.fusesource.leveldbjni") } - implementation(libs.slf4j.api) compileOnly(libs.guava) - testImplementation(project(":bundles:aws-bundle")) - testImplementation(project(":bundles:gcp-bundle")) - testImplementation(project(":bundles:aliyun-bundle")) - testImplementation(project(":bundles:azure-bundle")) testImplementation(project(":clients:client-java")) + testImplementation(project(":bundles:aws-bundle", configuration = "shadow")) + testImplementation(project(":bundles:gcp-bundle", configuration = "shadow")) + testImplementation(project(":bundles:aliyun-bundle", configuration = "shadow")) + testImplementation(project(":bundles:azure-bundle", configuration = "shadow")) testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(project(":server")) testImplementation(project(":server-common")) - testImplementation(libs.bundles.log4j) testImplementation(libs.hadoop3.gcs) testImplementation(libs.hadoop3.minicluster) diff --git a/catalogs/catalog-hive/build.gradle.kts b/catalogs/catalog-hive/build.gradle.kts index b471fccead1..6a8b815ab97 100644 --- a/catalogs/catalog-hive/build.gradle.kts +++ b/catalogs/catalog-hive/build.gradle.kts @@ -96,6 +96,9 @@ dependencies { testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(project(":server")) testImplementation(project(":server-common")) + testImplementation(project(":catalogs:hadoop-common")) { + exclude("*") + } testImplementation(libs.bundles.jetty) testImplementation(libs.bundles.jersey) diff --git a/catalogs/hadoop-common/build.gradle.kts b/catalogs/hadoop-common/build.gradle.kts index ab768cb1f11..566ce5986e3 100644 --- a/catalogs/hadoop-common/build.gradle.kts +++ b/catalogs/hadoop-common/build.gradle.kts @@ -23,6 +23,9 @@ plugins { // try to avoid adding extra dependencies because it is used by catalogs and connectors. dependencies { + implementation(project(":catalogs:catalog-common")) implementation(libs.commons.lang3) - implementation(libs.hadoop3.common) + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) + implementation(libs.guava) } diff --git a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java similarity index 95% rename from catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java rename to catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java index 129a8e88274..a1434e85c3e 100644 --- a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java @@ -18,8 +18,8 @@ */ package org.apache.gravitino.catalog.hadoop.fs; -import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.BUILTIN_HDFS_FS_PROVIDER; -import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.BUILTIN_LOCAL_FS_PROVIDER; +import static org.apache.gravitino.catalog.hadoop.Constants.BUILTIN_HDFS_FS_PROVIDER; +import static org.apache.gravitino.catalog.hadoop.Constants.BUILTIN_LOCAL_FS_PROVIDER; import static org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider.GRAVITINO_BYPASS; import com.google.common.collect.Maps; @@ -45,7 +45,7 @@ public static Map getFileSystemProviders(String file fileSystemProviders != null ? Arrays.stream(fileSystemProviders.split(",")) .map(f -> f.trim().toLowerCase(Locale.ROOT)) - .collect(java.util.stream.Collectors.toSet()) + .collect(Collectors.toSet()) : Sets.newHashSet(); // Add built-in file system providers to the use list automatically. diff --git a/clients/filesystem-hadoop3-runtime/build.gradle.kts b/clients/filesystem-hadoop3-runtime/build.gradle.kts index 8081a55604e..db439a4981e 100644 --- a/clients/filesystem-hadoop3-runtime/build.gradle.kts +++ b/clients/filesystem-hadoop3-runtime/build.gradle.kts @@ -28,6 +28,7 @@ plugins { dependencies { implementation(project(":clients:filesystem-hadoop3")) implementation(project(":clients:client-java-runtime", configuration = "shadow")) + implementation(libs.commons.lang3) } tasks.withType(ShadowJar::class.java) { @@ -38,6 +39,8 @@ tasks.withType(ShadowJar::class.java) { // Relocate dependencies to avoid conflicts relocate("com.google", "org.apache.gravitino.shaded.com.google") relocate("com.github.benmanes.caffeine", "org.apache.gravitino.shaded.com.github.benmanes.caffeine") + // relocate common lang3 package + relocate("org.apache.commons.lang3", "org.apache.gravitino.shaded.org.apache.commons.lang3") } tasks.jar { diff --git a/clients/filesystem-hadoop3/build.gradle.kts b/clients/filesystem-hadoop3/build.gradle.kts index d24eb4efdf2..424f6a11406 100644 --- a/clients/filesystem-hadoop3/build.gradle.kts +++ b/clients/filesystem-hadoop3/build.gradle.kts @@ -25,7 +25,8 @@ plugins { dependencies { compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) - compileOnly(libs.hadoop3.common) + compileOnly(libs.hadoop3.client.api) + compileOnly(libs.hadoop3.client.runtime) implementation(project(":catalogs:catalog-common")) { exclude(group = "*") @@ -35,32 +36,31 @@ dependencies { } implementation(libs.caffeine) + implementation(libs.guava) + implementation(libs.commons.lang3) testImplementation(project(":api")) testImplementation(project(":core")) + testImplementation(project(":catalogs:catalog-hadoop")) testImplementation(project(":common")) testImplementation(project(":server")) testImplementation(project(":server-common")) testImplementation(project(":clients:client-java")) testImplementation(project(":integration-test-common", "testArtifacts")) - testImplementation(project(":catalogs:catalog-hadoop")) - testImplementation(project(":bundles:gcp-bundle")) - testImplementation(project(":bundles:aliyun-bundle")) - testImplementation(project(":bundles:aws-bundle")) - testImplementation(project(":bundles:azure-bundle")) - testImplementation(project(":bundles:gcp-bundle")) + + testImplementation(project(":bundles:aws-bundle", configuration = "shadow")) + testImplementation(project(":bundles:gcp-bundle", configuration = "shadow")) + testImplementation(project(":bundles:aliyun-bundle", configuration = "shadow")) + testImplementation(project(":bundles:azure-bundle", configuration = "shadow")) testImplementation(libs.awaitility) testImplementation(libs.bundles.jetty) testImplementation(libs.bundles.jersey) testImplementation(libs.bundles.jwt) - testImplementation(libs.guava) - testImplementation(libs.hadoop3.client) - testImplementation(libs.hadoop3.common) { - exclude("com.sun.jersey") - exclude("javax.servlet", "servlet-api") - } + testImplementation(libs.hadoop3.client.api) + testImplementation(libs.hadoop3.client.runtime) + testImplementation(libs.hadoop3.hdfs) { exclude("com.sun.jersey") exclude("javax.servlet", "servlet-api") diff --git a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java index e18e376b46c..a9c40e55840 100644 --- a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java +++ b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java @@ -40,6 +40,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.audit.CallerContext; import org.apache.gravitino.audit.FilesetAuditConstants; @@ -392,6 +393,11 @@ private FilesetContextPair getFilesetContext(Path virtualPath, FilesetDataOperat scheme, GravitinoVirtualFileSystemConfiguration.GVFS_SCHEME); } + // Reset the FileSystem service loader to make sure the FileSystem will reload the + // service file systems, this is a temporary solution to fix the issue + // https://github.com/apache/gravitino/issues/5609 + resetFileSystemServiceLoader(scheme); + Map maps = getConfigMap(getConf()); return provider.getFileSystem(filePath, maps); } catch (IOException ioe) { @@ -404,6 +410,24 @@ private FilesetContextPair getFilesetContext(Path virtualPath, FilesetDataOperat return new FilesetContextPair(new Path(actualFileLocation), fs); } + private void resetFileSystemServiceLoader(String fsScheme) { + try { + Map> serviceFileSystems = + (Map>) + FieldUtils.getField(FileSystem.class, "SERVICE_FILE_SYSTEMS", true).get(null); + + if (serviceFileSystems.containsKey(fsScheme)) { + return; + } + + // Set this value to false so that FileSystem will reload the service file systems when + // needed. + FieldUtils.getField(FileSystem.class, "FILE_SYSTEMS_LOADED", true).set(null, false); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private Map getConfigMap(Configuration configuration) { Map maps = Maps.newHashMap(); configuration.forEach(entry -> maps.put(entry.getKey(), entry.getValue())); diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index ce58826cb93..9048556ffa5 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -10,10 +10,8 @@ license: "This software is licensed under the Apache License version 2." Hadoop catalog is a fileset catalog that using Hadoop Compatible File System (HCFS) to manage the storage location of the fileset. Currently, it supports local filesystem and HDFS. For -object storage like S3, GCS, and Azure Blob Storage, you can put the hadoop object store jar like -hadoop-aws into the `$GRAVITINO_HOME/catalogs/hadoop/libs` directory to enable the support. -Gravitino itself hasn't yet tested the object storage support, so if you have any issue, -please create an [issue](https://github.com/apache/gravitino/issues). +object storage like S3, GCS, Azure Blob Storage and OSS, you can put the hadoop object store jar like +`gravitino-aws-bundle-{gravitino-version}.jar` into the `$GRAVITINO_HOME/catalogs/hadoop/libs` directory to enable the support. Note that Gravitino uses Hadoop 3 dependencies to build Hadoop catalog. Theoretically, it should be compatible with both Hadoop 2.x and 3.x, since Gravitino doesn't leverage any new features in @@ -52,7 +50,7 @@ Apart from the above properties, to access fileset like HDFS, S3, GCS, OSS or cu | `s3-access-key-id` | The access key of the AWS S3. | (none) | Yes if it's a S3 fileset. | 0.7.0-incubating | | `s3-secret-access-key` | The secret key of the AWS S3. | (none) | Yes if it's a S3 fileset. | 0.7.0-incubating | -At the same time, you need to place the corresponding bundle jar [`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aws-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. +At the same time, you need to place the corresponding bundle jar [`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. #### GCS fileset @@ -62,7 +60,7 @@ At the same time, you need to place the corresponding bundle jar [`gravitino-aws | `default-filesystem-provider` | The name default filesystem providers of this Hadoop catalog if users do not specify the scheme in the URI. Default value is `builtin-local`, for GCS, if we set this value, we can omit the prefix 'gs://' in the location. | `builtin-local` | No | 0.7.0-incubating | | `gcs-service-account-file` | The path of GCS service account JSON file. | (none) | Yes if it's a GCS fileset. | 0.7.0-incubating | -In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gcp-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. +In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. #### OSS fileset @@ -74,7 +72,7 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp- | `oss-access-key-id` | The access key of the Aliyun OSS. | (none) | Yes if it's a OSS fileset. | 0.7.0-incubating | | `oss-secret-access-key` | The secret key of the Aliyun OSS. | (none) | Yes if it's a OSS fileset. | 0.7.0-incubating | -In the meantime, you need to place the corresponding bundle jar [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. +In the meantime, you need to place the corresponding bundle jar [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aliyun-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. #### Azure Blob Storage fileset @@ -86,7 +84,7 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy | `azure-storage-account-name ` | The account name of Azure Blob Storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | | `azure-storage-account-key` | The account key of Azure Blob Storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | -Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. +Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-azure-bundle/) in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`. :::note - Gravitino contains builtin file system providers for local file system(`builtin-local`) and HDFS(`builtin-hdfs`), that is to say if `filesystem-providers` is not set, Gravitino will still support local file system and HDFS. Apart from that, you can set the `filesystem-providers` to support other file systems like S3, GCS, OSS or custom file system. diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 162d535be11..0dbfd867a3d 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -77,7 +77,9 @@ Apart from the above properties, to access fileset like S3, GCS, OSS and custom | `s3-access-key-id` | The access key of the AWS S3. | (none) | Yes if it's a S3 fileset.| 0.7.0-incubating | | `s3-secret-access-key` | The secret key of the AWS S3. | (none) | Yes if it's a S3 fileset.| 0.7.0-incubating | -At the same time, you need to place the corresponding bundle jar [`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aws-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). +At the same time, you need to add the corresponding bundle jar +1. [`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/) in the classpath if no hadoop environment is available, or +2. [`gravitino-aws-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws/) and hadoop-aws jar and other necessary dependencies in the classpath. #### GCS fileset @@ -86,7 +88,9 @@ At the same time, you need to place the corresponding bundle jar [`gravitino-aws |--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|---------------------------|------------------| | `gcs-service-account-file` | The path of GCS service account JSON file. | (none) | Yes if it's a GCS fileset.| 0.7.0-incubating | -In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gcp-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). +In the meantime, you need to add the corresponding bundle jar +1. [`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp-bundle/) in the classpath if no hadoop environment is available, or +2. or [`gravitino-gcp-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp/) and [gcs-connector jar](https://github.com/GoogleCloudDataproc/hadoop-connectors/releases) and other necessary dependencies in the classpath. #### OSS fileset @@ -97,7 +101,10 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-gcp- | `oss-access-key-id` | The access key of the Aliyun OSS. | (none) | Yes if it's a OSS fileset.| 0.7.0-incubating | | `oss-secret-access-key` | The secret key of the Aliyun OSS. | (none) | Yes if it's a OSS fileset.| 0.7.0-incubating | -In the meantime, you need to place the corresponding bundle jar [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). + +In the meantime, you need to place the corresponding bundle jar +1. [`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aliyun-bundle/) in the classpath if no hadoop environment is available, or +2. [`gravitino-aliyun-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aliyun/) and hadoop-aliyun jar and other necessary dependencies in the classpath. #### Azure Blob Storage fileset @@ -106,7 +113,9 @@ In the meantime, you need to place the corresponding bundle jar [`gravitino-aliy | `azure-storage-account-name` | The account name of Azure Blob Storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | | `azure-storage-account-key` | The account key of Azure Blob Storage. | (none) | Yes if it's a Azure Blob Storage fileset. | 0.8.0-incubating | -Similar to the above, you need to place the corresponding bundle jar [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`). +Similar to the above, you need to place the corresponding bundle jar +1. [`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-azure-bundle/) in the classpath if no hadoop environment is available, or +2. [`gravitino-azure-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-azure/) and hadoop-azure jar and other necessary dependencies in the classpath. #### Custom fileset Since 0.7.0-incubating, users can define their own fileset type and configure the corresponding properties, for more, please refer to [Custom Fileset](./hadoop-catalog.md#how-to-custom-your-own-hcfs-file-system-fileset). @@ -137,8 +146,13 @@ You can configure these properties in two ways: ``` :::note -If you want to access the S3, GCS, OSS or custom fileset through GVFS, apart from the above properties, you need to place the corresponding bundle jar in the Hadoop environment. -For example if you want to access the S3 fileset, you need to place the S3 bundle jar [`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aws-bundle/) in the Hadoop environment(typically located in `${HADOOP_HOME}/share/hadoop/common/lib/`) or add it to the classpath. +If you want to access the S3, GCS, OSS or custom fileset through GVFS, apart from the above properties, you need to place the corresponding bundle jars in the Hadoop environment. +For example, if you want to access the S3 fileset, you need to place +1. The aws hadoop bundle jar [`gravitino-aws-bundle-${gravitino-version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/) +2. or [`gravitino-aws-${gravitino-version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws/), and hadoop-aws jar and other necessary dependencies + +to the classpath, it typically locates in `${HADOOP_HOME}/share/hadoop/common/lib/`). + ::: 2. Configure the properties in the `core-site.xml` file of the Hadoop environment: @@ -212,6 +226,12 @@ cp gravitino-filesystem-hadoop3-runtime-{version}.jar ${HADOOP_HOME}/share/hadoo # You need to ensure that the Kerberos has permission on the HDFS directory. kinit -kt your_kerberos.keytab your_kerberos@xxx.com + +# 4. Copy other dependencies to the Hadoop environment if you want to access the S3 fileset via GVFS +cp bundles/aws-bundle/build/libs/gravitino-aws-bundle-{version}.jar ${HADOOP_HOME}/share/hadoop/common/lib/ +cp clients/filesystem-hadoop3-runtime/build/libs/gravitino-filesystem-hadoop3-runtime-{version}-SNAPSHOT.jar ${HADOOP_HOME}/share/hadoop/common/lib/ +cp ${HADOOP_HOME}/share/hadoop/tools/lib/* ${HADOOP_HOME}/share/hadoop/common/lib/ + # 4. Try to list the fileset ./${HADOOP_HOME}/bin/hadoop dfs -ls gvfs://fileset/test_catalog/test_schema/test_fileset_1 ``` @@ -222,6 +242,36 @@ You can also perform operations on the files or directories managed by fileset t Make sure that your code is using the correct Hadoop environment, and that your environment has the `gravitino-filesystem-hadoop3-runtime-{version}.jar` dependency. +```xml + + + org.apache.gravitino + filesystem-hadoop3-runtime + {gravitino-version} + + + + + org.apache.gravitino + gravitino-aws-bundle + {gravitino-version} + + + + + org.apache.gravitino + gravitino-aws + {gravitino-version} + + + + org.apache.hadoop + hadoop-aws + {hadoop-version} + + +``` + For example: ```java @@ -462,8 +512,7 @@ from gravitino import gvfs options = { "cache_size": 20, "cache_expired_time": 3600, - "auth_type": "simple" - + "auth_type": "simple", # Optional, the following properties are required if you want to access the S3 fileset via GVFS python client, for GCS and OSS fileset, you should set the corresponding properties. "s3_endpoint": "http://localhost:9000", "s3_access_key_id": "minio", diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index a33c300ee88..52bccd9b480 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -36,12 +36,13 @@ airlift-json = "237" airlift-resolver = "1.6" hive2 = "2.3.9" hadoop2 = "2.10.2" -hadoop3 = "3.3.0" +hadoop3 = "3.3.1" hadoop3-gcs = "1.9.4-hadoop3" -hadoop3-abs = "3.3.0" -hadoop3-aliyun = "3.3.0" -hadoop-minikdc = "3.3.0" +hadoop3-abs = "3.3.1" +hadoop3-aliyun = "3.3.1" +hadoop-minikdc = "3.3.1" htrace-core4 = "4.1.0-incubating" +httpclient = "4.4.1" httpclient5 = "5.2.1" mockserver = "5.15.0" commons-csv = "1.12.0" @@ -177,6 +178,8 @@ hadoop3-aws = { group = "org.apache.hadoop", name = "hadoop-aws", version.ref = hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop3" } hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop3"} hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"} +hadoop3-client-api = { group = "org.apache.hadoop", name = "hadoop-client-api", version.ref = "hadoop3"} +hadoop3-client-runtime = { group = "org.apache.hadoop", name = "hadoop-client-runtime", version.ref = "hadoop3"} hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector", version.ref = "hadoop3-gcs"} hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun", version.ref = "hadoop3-aliyun"} @@ -184,6 +187,7 @@ hadoop3-abs = { group = "org.apache.hadoop", name = "hadoop-azure", version.ref htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" } airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"} airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"} +httpclient = { group = "org.apache.httpcomponents", name = "httpclient", version.ref = "httpclient" } httpclient5 = { group = "org.apache.httpcomponents.client5", name = "httpclient5", version.ref = "httpclient5" } mockserver-netty = { group = "org.mock-server", name = "mockserver-netty", version.ref = "mockserver" } mockserver-client-java = { group = "org.mock-server", name = "mockserver-client-java", version.ref = "mockserver" } diff --git a/iceberg/iceberg-rest-server/build.gradle.kts b/iceberg/iceberg-rest-server/build.gradle.kts index 03fe32c92a9..fe35c4e7789 100644 --- a/iceberg/iceberg-rest-server/build.gradle.kts +++ b/iceberg/iceberg-rest-server/build.gradle.kts @@ -62,10 +62,10 @@ dependencies { annotationProcessor(libs.lombok) compileOnly(libs.lombok) - testImplementation(project(":bundles:aliyun-bundle")) - testImplementation(project(":bundles:aws-bundle")) - testImplementation(project(":bundles:gcp-bundle", configuration = "shadow")) - testImplementation(project(":bundles:azure-bundle")) + testImplementation(project(":bundles:aliyun")) + testImplementation(project(":bundles:aws")) + testImplementation(project(":bundles:gcp", configuration = "shadow")) + testImplementation(project(":bundles:azure", configuration = "shadow")) testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") diff --git a/integration-test-common/build.gradle.kts b/integration-test-common/build.gradle.kts index 283169a76a9..bd15dc2a34f 100644 --- a/integration-test-common/build.gradle.kts +++ b/integration-test-common/build.gradle.kts @@ -53,11 +53,11 @@ dependencies { exclude("org.elasticsearch") exclude("org.elasticsearch.client") exclude("org.elasticsearch.plugin") + exclude("org.apache.hadoop", "hadoop-common") } - testImplementation(libs.hadoop3.common) { - exclude("com.sun.jersey") - exclude("javax.servlet", "servlet-api") - } + testImplementation(libs.hadoop3.client.api) + testImplementation(libs.hadoop3.client.runtime) + testImplementation(platform("org.junit:junit-bom:5.9.1")) testImplementation("org.junit.jupiter:junit-jupiter") } diff --git a/settings.gradle.kts b/settings.gradle.kts index 562614764b3..c865e14e7a2 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -77,8 +77,8 @@ project(":spark-connector:spark-runtime-3.5").projectDir = file("spark-connector include("web:web", "web:integration-test") include("docs") include("integration-test-common") -include(":bundles:aws-bundle") -include(":bundles:gcp-bundle") -include(":bundles:aliyun-bundle") -include(":bundles:azure-bundle") -include("catalogs:hadoop-common") +include(":bundles:aws", ":bundles:aws-bundle") +include(":bundles:gcp", ":bundles:gcp-bundle") +include(":bundles:aliyun", ":bundles:aliyun-bundle") +include(":bundles:azure", ":bundles:azure-bundle") +include(":catalogs:hadoop-common")