diff --git a/.github/workflows/broken_links_checker.yml b/.github/workflows/broken_links_checker.yml
index f2079ec3..82ec1cd5 100644
--- a/.github/workflows/broken_links_checker.yml
+++ b/.github/workflows/broken_links_checker.yml
@@ -15,7 +15,7 @@ jobs:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Configure broken links checker
run: |
mkdir -p ./target
@@ -27,6 +27,6 @@ jobs:
']}' > ./target/broken_links_checker.json
- uses: gaurav-nelson/github-action-markdown-link-check@v1
with:
- use-quiet-mode: 'yes'
- use-verbose-mode: 'yes'
+ use-quiet-mode: "yes"
+ use-verbose-mode: "yes"
config-file: ./target/broken_links_checker.json
diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
index 8ecf1525..ec68c37a 100644
--- a/.github/workflows/ci-build.yml
+++ b/.github/workflows/ci-build.yml
@@ -24,11 +24,13 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- - name: Set up JDK 11
+ - name: Set up JDK 11 & 17
uses: actions/setup-java@v3
with:
distribution: 'temurin'
- java-version: 11
+ java-version: |
+ 17
+ 11
cache: 'maven'
- name: Cache SonarCloud packages
uses: actions/cache@v3
@@ -42,7 +44,7 @@ jobs:
run: docker pull exasol/docker-db:${{ matrix.exasol-docker-version }}
- name: Run tests and build with Maven
run: |
- mvn --batch-mode verify ${{ matrix.profile }} \
+ JAVA_HOME=$JAVA_HOME_11_X64 mvn --batch-mode verify ${{ matrix.profile }} \
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \
-DtrimStackTrace=false
env:
@@ -55,7 +57,7 @@ jobs:
- name: Sonar analysis
if: ${{ env.SONAR_TOKEN != null && matrix.profile == '-Pspark3.4' }}
run: |
- mvn --batch-mode org.sonarsource.scanner.maven:sonar-maven-plugin:sonar \
+ JAVA_HOME=$JAVA_HOME_17_X64 mvn --batch-mode org.sonarsource.scanner.maven:sonar-maven-plugin:sonar \
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \
-DtrimStackTrace=false \
-Dsonar.organization=exasol \
diff --git a/.github/workflows/pk-verify.yml b/.github/workflows/pk-verify.yml
index 5f2d1e04..03935652 100644
--- a/.github/workflows/pk-verify.yml
+++ b/.github/workflows/pk-verify.yml
@@ -32,4 +32,4 @@ jobs:
key: ${{ runner.os }}-sonar
restore-keys: ${{ runner.os }}-sonar
- name: Run Project Keeper Separately
- run: mvn --batch-mode -DtrimStackTrace=false com.exasol:project-keeper-maven-plugin:2.9.11:verify --projects .
+ run: mvn --batch-mode -DtrimStackTrace=false com.exasol:project-keeper-maven-plugin:2.9.12:verify --projects .
diff --git a/dependencies.md b/dependencies.md
index f5b09445..37c92aba 100644
--- a/dependencies.md
+++ b/dependencies.md
@@ -45,6 +45,7 @@
| [jersey-core-server][47] | [EPL 2.0][37]; [The GNU General Public License (GPL), Version 2, With Classpath Exception][38]; [Apache License, 2.0][12]; [Modified BSD][43] |
| [jersey-core-client][48] | [EPL 2.0][37]; [GPL2 w/ CPE][38]; [EDL 1.0][41]; [BSD 2-Clause][42]; [Apache License, 2.0][12]; [Public Domain][39]; [Modified BSD][43]; [jQuery license][44]; [MIT license][45]; [W3C license][46] |
| [Apache Avro Mapred API][49] | [Apache-2.0][3] |
+| [Apache Avro][49] | [Apache-2.0][3] |
### Test Dependencies
@@ -109,6 +110,7 @@
| [AWS Java SDK :: Services :: Amazon S3][80] | [Apache License, Version 2.0][81] |
| Apache Hadoop Amazon Web Services support | [Apache License, Version 2.0][3] |
| [wildfly-openssl][82] | [Apache License 2.0][83] |
+| [Apache Avro][49] | [Apache-2.0][3] |
### Test Dependencies
@@ -116,13 +118,14 @@
| ----------------------------------------------- | --------------------------------- |
| [JUnit Jupiter (Aggregator)][84] | [Eclipse Public License v2.0][85] |
| [JUnit Jupiter API][84] | [Eclipse Public License v2.0][85] |
+| [junit-pioneer][86] | [Eclipse Public License v2.0][85] |
| [Test Database Builder for Java][58] | [MIT License][59] |
-| [Test utilities for `java.util.logging`][86] | [MIT][87] |
+| [Test utilities for `java.util.logging`][87] | [MIT][88] |
| [Matcher for SQL Result Sets][60] | [MIT License][61] |
| [Test containers for Exasol on Docker][62] | [MIT License][63] |
-| [Testcontainers :: JUnit Jupiter Extension][88] | [MIT][89] |
+| [Testcontainers :: JUnit Jupiter Extension][89] | [MIT][90] |
| [mockito-junit-jupiter][53] | [The MIT License][54] |
-| [Testcontainers :: Localstack][88] | [MIT][89] |
+| [Testcontainers :: Localstack][89] | [MIT][90] |
| [AWS Java SDK for Amazon S3][80] | [Apache License, Version 2.0][81] |
### Plugin Dependencies
@@ -150,7 +153,7 @@
| [OpenFastTrace Maven Plugin][18] | [GNU General Public License v3.0][19] |
| [Maven Clean Plugin][20] | [The Apache Software License, Version 2.0][8] |
| [Maven Resources Plugin][78] | [The Apache Software License, Version 2.0][8] |
-| [Maven JAR Plugin][90] | [The Apache Software License, Version 2.0][8] |
+| [Maven JAR Plugin][91] | [The Apache Software License, Version 2.0][8] |
| [Maven Install Plugin][21] | [The Apache Software License, Version 2.0][8] |
| [Maven Site Plugin 3][22] | [The Apache Software License, Version 2.0][8] |
@@ -240,8 +243,9 @@
[83]: http://repository.jboss.org/licenses/apache-2.0.txt
[84]: https://junit.org/junit5/
[85]: https://www.eclipse.org/legal/epl-v20.html
-[86]: https://github.com/exasol/java-util-logging-testing/
-[87]: https://opensource.org/licenses/MIT
-[88]: https://java.testcontainers.org
-[89]: http://opensource.org/licenses/MIT
-[90]: http://maven.apache.org/plugins/maven-jar-plugin/
+[86]: https://junit-pioneer.org/
+[87]: https://github.com/exasol/java-util-logging-testing/
+[88]: https://opensource.org/licenses/MIT
+[89]: https://java.testcontainers.org
+[90]: http://opensource.org/licenses/MIT
+[91]: http://maven.apache.org/plugins/maven-jar-plugin/
diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md
index 8d25b8d7..3ac3b265 100644
--- a/doc/changes/changelog.md
+++ b/doc/changes/changelog.md
@@ -1,5 +1,6 @@
# Changes
+* [2.1.3](changes_2.1.3.md)
* [2.1.2](changes_2.1.2.md)
* [2.1.1](changes_2.1.1.md)
* [2.1.0](changes_2.1.0.md)
diff --git a/doc/changes/changes_2.1.3.md b/doc/changes/changes_2.1.3.md
new file mode 100644
index 00000000..a1a497a6
--- /dev/null
+++ b/doc/changes/changes_2.1.3.md
@@ -0,0 +1,29 @@
+# Spark Connector 2.1.3, released 2023-10-20
+
+Code name: More flexibility for AWS Credentials specification in spark-connector-s3
+
+## Summary
+In addition to explicit AWS Credentials specification we now support environment variables and EC2 instance profiles.
+Fixes CVE-2023-39410 in apache avro (transitive dependency).
+
+## Features
+
+* 192: Add support for AWS IAM Profile Credentials for s3 connector.
+
+## Dependency Updates
+
+### Spark Exasol Connector With JDBC
+
+#### Compile Dependency Updates
+
+* Added `org.apache.avro:avro:1.11.3`
+
+### Spark Exasol Connector With S3
+
+#### Compile Dependency Updates
+
+* Added `org.apache.avro:avro:1.11.3`
+
+#### Test Dependency Updates
+
+* Added `org.junit-pioneer:junit-pioneer:2.1.0`
diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md
index 36c4af07..47b169e6 100644
--- a/doc/user_guide/user_guide.md
+++ b/doc/user_guide/user_guide.md
@@ -10,6 +10,7 @@ Exasol tables.
- [Versioning](#versioning)
- [Format](#format)
- [Using as Dependency](#using-as-dependency)
+- [AWS Authentication](#aws-authentication)
- [Configuration Parameters](#configuration-options)
- [Creating a Spark DataFrame From Exasol Query](#creating-a-spark-dataframe-from-exasol-query)
- [Saving Spark DataFrame to an Exasol Table](#saving-spark-dataframe-to-an-exasol-table)
@@ -31,7 +32,7 @@ Additionally, please make sure that the Exasol nodes are reachable from the Spar
### S3
-When using with S3 intermediate storage please make sure that there is access to an S3 bucket. And please prepare AWS access and secret keys with enough permissions for the S3 bucket.
+When using with S3 intermediate storage please make sure that there is access to an S3 bucket. In details, AWS Authentication is described in the [corresponding section of this document] (#aws-authentication).
## Versioning
@@ -145,6 +146,41 @@ For example, S3 variant with version `2.0.0-spark-3.4.1`:
spark-shell --jars spark-connector-s3_2.13-2.0.0-spark-3.4.1-assembly.jar
```
+## AWS Authentication
+
+If S3 intermediate storage is used, proper AWS Authentication parameters has to be provided:
+
+* Spark has to be able to read and write into S3 (to export and import dataframe's data);
+* Database has to be able to read and write into S3 (to perform `IMPORT` and `EXPORT` statements).
+
+There are several ways to provide AWS credentials and concrete method depends on configuration of your cloud infrastructure. Here we cover main scenarios and configuration options you can tweak.
+
+### Credential Providers
+
+The first option is `awsCredentialsProvider` with which you can specify list of ways credentials are retrieved from your spark environment. This parameter is not required and if not specified, the default list of credentials providers is being used. At the moment of writing, this list includes the following credentials providers:
+
+* `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider`: credentials are explicitly set with options `awsAccessKeyId` and `awsSecretAccessKey`.
+* `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`: credentials are retrieved from environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` (of Spark process).
+* `com.amazonaws.auth.InstanceProfileCredentialsProvider`: credentials are retrieved from EC2 instance IAM role.
+
+There are many other credential providers in Amazon Hadoop library and 3rd party libraries. If you need to change default behaviour, you can set `awsCredentialsProvider` option to list of comma-separated class names.
+
+In details you can read about Credentials Providers in [this document](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Authenticating_with_S3).
+
+### Explicitly provided credentials
+If you want to specify Access Key ID and Secret Access Key explicitly you can set options `awsAccessKeyId` and `awsSecretAccessKey`.
+
+Alternatively, you can set environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in your Spark cluster configuration.
+
+In both cases, credentials will be used for S3 operations from Spark's side and forwarded to the database in `IMPORT` and `EXPORT` commands (as `USER 'key' IDENTIFIED BY 'secret_key'` parameters).
+
+### Using EC2 Instance Profile
+In AWS you can attach permissions to the role associated with EC2 instance your Spark cluster is working. In that case, S3 credentials are extracted from instance profile automatically by `InstanceProfileCredentialsProvider`, so you don't need to pass any options.
+
+In this scenario, no credentials are being put in `IMPORT` and `EXPORT` DB commands, so you need to make sure that DB has proper access to S3 bucket you're using for intermediate storage.
+
+If database is running in EC2, it is possible to use EC2 Instance Profiles, but it has to be enabled explicitly, as described in [this document](https://exasol.my.site.com/s/article/Changelog-content-15155?language=en_US).
+
## Configuration Options
In this section, we describe the common configuration parameters that are used for both JDBC and S3 variants to facilitate the integration between Spark and Exasol clusters.
@@ -208,8 +244,9 @@ When using the `S3` variant of the connector you should provide the following ad
| Parameter | Default | Required | Description |
|-----------------------|:------------------:|:--------:|-------------------------------------------------------------------- |
| `s3Bucket` | | ✓ | A bucket name for intermediate storage |
-| `awsAccessKeyId` | | ✓ | AWS Access Key for accessing bucket |
-| `awsSecretAccessKey` | | ✓ | AWS Secret Key for accessing bucket |
+| `awsAccessKeyId` | | | AWS Access Key for accessing bucket |
+| `awsSecretAccessKey` | | | AWS Secret Key for accessing bucket |
+| `awsCredentialsProvider` | [default providers](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Authenticating_with_S3) | | List of classes used to extract credentials information from the runtime environment. |
| `numPartitions` | `8` | | Number of partitions that will match number of files in `S3` bucket |
| `awsRegion` | `us-east-1` | | AWS Region for provided bucket |
| `awsEndpointOverride` | (default endpoint) | | AWS S3 Endpoint for bucket, set this value for custom endpoints |
diff --git a/exasol-jdbc/pom.xml b/exasol-jdbc/pom.xml
index ebd11d3a..b70b2f46 100644
--- a/exasol-jdbc/pom.xml
+++ b/exasol-jdbc/pom.xml
@@ -121,6 +121,11 @@
+
+
+ org.apache.avro
+ avro
+
org.scalatest
diff --git a/exasol-s3/pom.xml b/exasol-s3/pom.xml
index b9d33714..4d4d0c5c 100644
--- a/exasol-s3/pom.xml
+++ b/exasol-s3/pom.xml
@@ -53,6 +53,11 @@
wildfly-openssl
2.2.5.Final
+
+
+ org.apache.avro
+ avro
+
org.junit.jupiter
@@ -64,6 +69,11 @@
junit-jupiter-api
test
+
+ org.junit-pioneer
+ junit-pioneer
+ test
+
com.exasol
test-db-builder-java
diff --git a/exasol-s3/src/main/java/com/exasol/spark/s3/AbstractImportExportQueryGenerator.java b/exasol-s3/src/main/java/com/exasol/spark/s3/AbstractImportExportQueryGenerator.java
deleted file mode 100644
index ab72ae59..00000000
--- a/exasol-s3/src/main/java/com/exasol/spark/s3/AbstractImportExportQueryGenerator.java
+++ /dev/null
@@ -1,61 +0,0 @@
-package com.exasol.spark.s3;
-
-import com.exasol.spark.common.ExasolOptions;
-import com.exasol.spark.common.Option;
-
-/**
- * An common {@code CSV} query generator class.
- *
- * A generator for Exasol {@code IMPORT} or {@code EXPORT} queries that access {@code CSV} files in intermediate storage
- * systems.
- *
- * @see Exasol Import
- * @see Exasol Export
- */
-public abstract class AbstractImportExportQueryGenerator {
- private static final String DEFAULT_S3_ENDPOINT = "amazonaws.com";
-
- /** Spark options for scenarios involving an Exasol database */
- protected final ExasolOptions options;
-
- /**
- * Creates a new instance of {@link AbstractImportExportQueryGenerator}.
- *
- * @param options user provided options
- */
- public AbstractImportExportQueryGenerator(final ExasolOptions options) {
- this.options = options;
- }
-
- /**
- * Creates an {@code IDENTIFIED BY} part of a query.
- *
- * @return identifiedBy part of a query
- */
- public String getIdentifier() {
- final String awsAccessKeyId = this.options.get(Option.AWS_ACCESS_KEY_ID.key());
- final String awsSecretAccessKey = this.options.get(Option.AWS_SECRET_ACCESS_KEY.key());
- return "AT '" + escapeStringLiteral(getBucketURL()) + "'\nUSER '" + escapeStringLiteral(awsAccessKeyId)
- + "' IDENTIFIED BY '" + escapeStringLiteral(awsSecretAccessKey) + "'\n";
- }
-
- private String escapeStringLiteral(final String input) {
- return input.replace("'", "''");
- }
-
- private String getBucketURL() {
- return "https://" + this.options.getS3Bucket() + ".s3." + getS3Endpoint();
- }
-
- private String getS3Endpoint() {
- if (!this.options.containsKey(Option.S3_ENDPOINT_OVERRIDE.key())) {
- return DEFAULT_S3_ENDPOINT;
- }
- final String override = this.options.get(Option.S3_ENDPOINT_OVERRIDE.key());
- if (this.options.hasEnabled(Option.REPLACE_LOCALHOST_BY_DEFAULT_S3_ENDPOINT.key())) {
- return override.replace("localhost", DEFAULT_S3_ENDPOINT);
- }
- return override;
- }
-
-}
diff --git a/exasol-s3/src/main/java/com/exasol/spark/s3/BaseQueryGenerator.java b/exasol-s3/src/main/java/com/exasol/spark/s3/BaseQueryGenerator.java
new file mode 100644
index 00000000..5c8e90d1
--- /dev/null
+++ b/exasol-s3/src/main/java/com/exasol/spark/s3/BaseQueryGenerator.java
@@ -0,0 +1,104 @@
+package com.exasol.spark.s3;
+
+import com.exasol.spark.common.ExasolOptions;
+import com.exasol.spark.common.Option;
+
+import com.amazonaws.util.StringUtils;
+
+import java.util.AbstractMap;
+import java.util.Map;
+
+import static com.amazonaws.SDKGlobalConfiguration.ACCESS_KEY_ENV_VAR;
+import static com.amazonaws.SDKGlobalConfiguration.SECRET_KEY_ENV_VAR;
+
+/**
+ * An common {@code CSV} query generator class.
+ *
+ * A generator for Exasol {@code IMPORT} or {@code EXPORT} queries that access {@code CSV} files in intermediate storage
+ * systems.
+ *
+ * @see Exasol Import
+ * @see Exasol Export
+ */
+class BaseQueryGenerator {
+ protected static final String DEFAULT_S3_ENDPOINT = "amazonaws.com";
+
+ /** Spark options for scenarios involving an Exasol database */
+ protected final ExasolOptions options;
+
+ /**
+ * Creates a new instance of {@link BaseQueryGenerator}.
+ *
+ * @param options user provided options
+ */
+ public BaseQueryGenerator(final ExasolOptions options) {
+ this.options = options;
+ }
+
+ /**
+ * Creates an {@code IDENTIFIED BY} part of a query.
+ *
+ * @return identifiedBy part of a query
+ */
+ public String getIdentifier() {
+ Map.Entry awsCreds = getAWSCredentials();
+
+ StringBuilder result = new StringBuilder("AT '");
+ result.append(escapeStringLiteral(getBucketURL()));
+ result.append('\'');
+
+ // no access key -> no user in the identifier, giving an option to use AWS EC2 Role Profiles
+ // https://exasol.my.site.com/s/article/Changelog-content-15155?language=en_US
+ if (!StringUtils.isNullOrEmpty(awsCreds.getKey())) {
+ result.append(" USER '");
+ result.append(escapeStringLiteral(awsCreds.getKey()));
+ result.append('\'');
+ }
+
+ if (!StringUtils.isNullOrEmpty(awsCreds.getValue())) {
+ result.append(" IDENTIFIED BY '");
+ result.append(escapeStringLiteral(awsCreds.getValue()));
+ result.append('\'');
+ }
+ result.append('\n');
+ return result.toString();
+ }
+
+ private String escapeStringLiteral(final String input) {
+ return input.replace("'", "''");
+ }
+
+ private String getBucketURL() {
+ return "https://" + this.options.getS3Bucket() + ".s3." + getS3Endpoint();
+ }
+
+ protected String getS3Endpoint() {
+ if (!this.options.containsKey(Option.S3_ENDPOINT_OVERRIDE.key())) {
+ return DEFAULT_S3_ENDPOINT;
+ }
+ final String override = this.options.get(Option.S3_ENDPOINT_OVERRIDE.key());
+ if (this.options.hasEnabled(Option.REPLACE_LOCALHOST_BY_DEFAULT_S3_ENDPOINT.key())) {
+ return override.replace("localhost", DEFAULT_S3_ENDPOINT);
+ }
+ return override;
+ }
+
+ protected Map.Entry getAWSCredentials() {
+ String awsAccessKeyId;
+ String awsSecretAccessKey = null;
+
+ if (this.options.containsKey(Option.AWS_ACCESS_KEY_ID.key())) {
+ awsAccessKeyId = this.options.get(Option.AWS_ACCESS_KEY_ID.key());
+ if (this.options.containsKey(Option.AWS_SECRET_ACCESS_KEY.key())) {
+ awsSecretAccessKey = this.options.get(Option.AWS_SECRET_ACCESS_KEY.key());
+ }
+ } else {
+ // Retrieve access key and secret access key from environment variables
+ awsAccessKeyId = System.getenv(ACCESS_KEY_ENV_VAR);
+ awsSecretAccessKey = System.getenv(SECRET_KEY_ENV_VAR);
+ }
+ awsAccessKeyId = StringUtils.trim(awsAccessKeyId);
+ awsSecretAccessKey = StringUtils.trim(awsSecretAccessKey);
+ return new AbstractMap.SimpleImmutableEntry<>(awsAccessKeyId, awsSecretAccessKey);
+ }
+}
diff --git a/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolBatchWrite.java b/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolBatchWrite.java
index c3e69643..818ca7be 100644
--- a/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolBatchWrite.java
+++ b/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolBatchWrite.java
@@ -106,7 +106,7 @@ private static String removeCredentialsFromQuery(final String input) {
* A class that generates {@code SQL} query for importing data from intermediate {@code S3} location into Exasol
* database.
*/
- private static class S3ImportQueryGenerator extends AbstractImportExportQueryGenerator {
+ private static class S3ImportQueryGenerator extends BaseQueryGenerator {
public S3ImportQueryGenerator(final ExasolOptions options) {
super(options);
diff --git a/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3ScanBuilder.java b/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3ScanBuilder.java
index 835d8f3c..6f481bf2 100644
--- a/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3ScanBuilder.java
+++ b/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3ScanBuilder.java
@@ -154,7 +154,7 @@ private void prepareIntermediateData(final String bucketKey) {
/**
* A class that generates {@code SQL} query for exporting data from Exasol database into {@code S3} location.
*/
- private static class S3ExportQueryGenerator extends AbstractImportExportQueryGenerator {
+ private static class S3ExportQueryGenerator extends BaseQueryGenerator {
private final String bucketKey;
private final int numberOfFiles;
diff --git a/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3Table.java b/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3Table.java
index f64c6b6c..1de89028 100644
--- a/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3Table.java
+++ b/exasol-s3/src/main/java/com/exasol/spark/s3/ExasolS3Table.java
@@ -65,24 +65,26 @@ public Set capabilities() {
@Override
public ScanBuilder newScanBuilder(final CaseInsensitiveStringMap map) {
- final ExasolOptions options = ExasolOptions.from(map);
- validateNumberOfPartitions(options);
- updateSparkConfigurationForS3(options);
- return new ExasolS3ScanBuilder(options, this.schema, map);
+ return new ExasolS3ScanBuilder(this.buildOptions(map), this.schema, map);
}
@Override
public WriteBuilder newWriteBuilder(final LogicalWriteInfo defaultInfo) {
- final ExasolOptions options = ExasolOptions.from(defaultInfo.options());
+ final ExasolOptions options = this.buildOptions(defaultInfo.options());
validateHasTable(options);
- validateNumberOfPartitions(options);
- updateSparkConfigurationForS3(options);
final SparkSession sparkSession = SparkSession.active();
final String applicationId = sparkSession.sparkContext().applicationId();
final S3BucketKeyPathProvider prov = new UUIDS3BucketKeyPathProvider(applicationId);
return new ExasolWriteBuilderProvider(options, prov).createWriteBuilder(this.schema, defaultInfo);
}
+ protected ExasolOptions buildOptions(final CaseInsensitiveStringMap map) {
+ final ExasolOptions result = ExasolOptions.from(map);
+ validateNumberOfPartitions(result);
+ updateSparkConfigurationForS3(result);
+ return result;
+ }
+
private void validateNumberOfPartitions(final ExasolOptions options) {
final int numberOfPartitions = options.getNumberOfPartitions();
final int maxAllowedPartitions = Integer.parseInt(Option.MAX_ALLOWED_NUMBER_OF_PARTITIONS.key());
@@ -109,12 +111,12 @@ private void updateSparkConfigurationForS3(final ExasolOptions options) {
final SparkSession sparkSession = SparkSession.active();
synchronized (sparkSession.sparkContext().hadoopConfiguration()) {
final Configuration conf = sparkSession.sparkContext().hadoopConfiguration();
- conf.set("fs.s3a.access.key", options.get(Option.AWS_ACCESS_KEY_ID.key()));
- conf.set("fs.s3a.secret.key", options.get(Option.AWS_SECRET_ACCESS_KEY.key()));
+ if (options.containsKey(Option.AWS_ACCESS_KEY_ID.key())) {
+ conf.set("fs.s3a.access.key", options.get(Option.AWS_ACCESS_KEY_ID.key()));
+ conf.set("fs.s3a.secret.key", options.get(Option.AWS_SECRET_ACCESS_KEY.key()));
+ }
if (options.containsKey(Option.AWS_CREDENTIALS_PROVIDER.key())) {
conf.set("fs.s3a.aws.credentials.provider", options.get(Option.AWS_CREDENTIALS_PROVIDER.key()));
- } else {
- conf.set("fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider");
}
if (options.containsKey(Option.S3_ENDPOINT_OVERRIDE.key())) {
conf.set("fs.s3a.endpoint", "http://" + options.get(Option.S3_ENDPOINT_OVERRIDE.key()));
diff --git a/exasol-s3/src/test/java/com/exasol/spark/s3/BaseQueryGeneratorTest.java b/exasol-s3/src/test/java/com/exasol/spark/s3/BaseQueryGeneratorTest.java
new file mode 100644
index 00000000..45d44e24
--- /dev/null
+++ b/exasol-s3/src/test/java/com/exasol/spark/s3/BaseQueryGeneratorTest.java
@@ -0,0 +1,127 @@
+package com.exasol.spark.s3;
+
+import com.exasol.spark.common.ExasolOptions;
+import com.exasol.spark.common.Option;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+import org.junit.jupiter.api.Test;
+import org.junitpioneer.jupiter.ClearEnvironmentVariable;
+import org.junitpioneer.jupiter.SetEnvironmentVariable;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.nullValue;
+
+import static com.amazonaws.SDKGlobalConfiguration.ACCESS_KEY_ENV_VAR;
+import static com.amazonaws.SDKGlobalConfiguration.SECRET_KEY_ENV_VAR;
+
+
+class BaseQueryGeneratorTest {
+ private final Map basic_params = Map.of(
+ Option.TABLE.key(), "some_table",
+ Option.S3_BUCKET.key(), "bucket");
+
+ @Test
+ @ClearEnvironmentVariable(key=ACCESS_KEY_ENV_VAR)
+ @ClearEnvironmentVariable(key=SECRET_KEY_ENV_VAR)
+ void testCredentialsEmptyOptions() {
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(basic_params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ final Map.Entry awsCreds = generator.getAWSCredentials();
+
+ assertThat(awsCreds.getKey(), nullValue());
+ assertThat(awsCreds.getValue(), nullValue());
+ }
+
+ @Test
+ @SetEnvironmentVariable(key=ACCESS_KEY_ENV_VAR, value="env_key")
+ @SetEnvironmentVariable(key=SECRET_KEY_ENV_VAR, value="env_sec")
+ void testCredentialsExplicitOptions() {
+ final Map params = new HashMap<>(basic_params);
+ params.put(Option.AWS_ACCESS_KEY_ID.key(), "access_key");
+ params.put(Option.AWS_SECRET_ACCESS_KEY.key(), "secret_key");
+
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ final Map.Entry awsCreds = generator.getAWSCredentials();
+
+ assertThat(awsCreds.getKey(), equalTo("access_key"));
+ assertThat(awsCreds.getValue(), equalTo("secret_key"));
+ }
+
+ @Test
+ @SetEnvironmentVariable(key=ACCESS_KEY_ENV_VAR, value="env_key")
+ @SetEnvironmentVariable(key=SECRET_KEY_ENV_VAR, value="env_sec")
+ void testCredentialsFromEnvironment() {
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(basic_params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ final Map.Entry awsCreds = generator.getAWSCredentials();
+
+ assertThat(awsCreds.getKey(), equalTo("env_key"));
+ assertThat(awsCreds.getValue(), equalTo("env_sec"));
+ }
+
+ @Test
+ @ClearEnvironmentVariable(key=ACCESS_KEY_ENV_VAR)
+ @ClearEnvironmentVariable(key=SECRET_KEY_ENV_VAR)
+ void testIdentifierNoCredentials() {
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(basic_params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ assertThat(generator.getIdentifier(), equalTo("AT 'https://bucket.s3.amazonaws.com'\n"));
+ }
+
+ @Test
+ @ClearEnvironmentVariable(key=ACCESS_KEY_ENV_VAR)
+ @ClearEnvironmentVariable(key=SECRET_KEY_ENV_VAR)
+ void testIdentifierOnlyUserProvided() {
+ final Map params = new HashMap<>(basic_params);
+ params.put(Option.AWS_ACCESS_KEY_ID.key(), "access_key");
+
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ assertThat(generator.getIdentifier(), equalTo("AT 'https://bucket.s3.amazonaws.com' USER 'access_key'\n"));
+ }
+
+ @Test
+ @ClearEnvironmentVariable(key=ACCESS_KEY_ENV_VAR)
+ @ClearEnvironmentVariable(key=SECRET_KEY_ENV_VAR)
+ void testIdentifierBothUserAndKey() {
+ final Map params = new HashMap<>(basic_params);
+ params.put(Option.AWS_ACCESS_KEY_ID.key(), "access_key");
+ params.put(Option.AWS_SECRET_ACCESS_KEY.key(), "secret_key");
+
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ assertThat(generator.getIdentifier(), equalTo("AT 'https://bucket.s3.amazonaws.com' USER 'access_key'" +
+ " IDENTIFIED BY 'secret_key'\n"));
+ }
+
+ @Test
+ @SetEnvironmentVariable(key=ACCESS_KEY_ENV_VAR, value="key")
+ @SetEnvironmentVariable(key=SECRET_KEY_ENV_VAR, value="secret")
+ void testIdentifierViaEnv() {
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(basic_params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ assertThat(generator.getIdentifier(), equalTo("AT 'https://bucket.s3.amazonaws.com' USER 'key'" +
+ " IDENTIFIED BY 'secret'\n"));
+ }
+
+ @Test
+ void testS3EndpointDefault() {
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(basic_params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ assertThat(generator.getS3Endpoint(), equalTo(BaseQueryGenerator.DEFAULT_S3_ENDPOINT));
+ }
+
+ @Test
+ void testS3EndpointWithOverride() {
+ final Map params = new HashMap<>(basic_params);
+ params.put(Option.S3_ENDPOINT_OVERRIDE.key(), "my_endpoint");
+
+ final ExasolOptions options = ExasolOptions.from(new CaseInsensitiveStringMap(params));
+ final BaseQueryGenerator generator = new BaseQueryGenerator(options);
+ assertThat(generator.getS3Endpoint(), equalTo("my_endpoint"));
+ }
+}
diff --git a/exasol-s3/src/test/java/com/exasol/spark/s3/ExasolWriteBuilderProviderIT.java b/exasol-s3/src/test/java/com/exasol/spark/s3/ExasolWriteBuilderProviderIT.java
index 682ee4cc..76dca030 100644
--- a/exasol-s3/src/test/java/com/exasol/spark/s3/ExasolWriteBuilderProviderIT.java
+++ b/exasol-s3/src/test/java/com/exasol/spark/s3/ExasolWriteBuilderProviderIT.java
@@ -22,10 +22,12 @@
import com.exasol.spark.common.ExasolValidationException;
import io.netty.util.internal.ThreadLocalRandom;
+import org.testcontainers.junit.jupiter.Testcontainers;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
@ExtendWith(MockitoExtension.class)
+@Testcontainers
class ExasolWriteBuilderProviderIT extends S3IntegrationTestSetup {
static final String s3BucketKey = "testS3BucketKey";
final String applicationId = "spark-test-app-id";
diff --git a/exasol-s3/src/test/java/com/exasol/spark/s3/S3TableConfTest.java b/exasol-s3/src/test/java/com/exasol/spark/s3/S3TableConfTest.java
new file mode 100644
index 00000000..59f1cf49
--- /dev/null
+++ b/exasol-s3/src/test/java/com/exasol/spark/s3/S3TableConfTest.java
@@ -0,0 +1,90 @@
+package com.exasol.spark.s3;
+
+import com.exasol.spark.SparkSessionProvider;
+import com.exasol.spark.common.Option;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+import org.junit.jupiter.api.*;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.containsString;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class S3TableConfTest {
+ private final StructType schema = new StructType(
+ new StructField[] {
+ new StructField("f_str", DataTypes.StringType, true, Metadata.empty()),
+ new StructField("f_int", DataTypes.IntegerType, true, Metadata.empty()),
+ }
+ );
+ private final Map basic_params = Map.of(Option.TABLE.key(), "some_table");
+ private static SparkSession spark;
+
+ @BeforeEach
+ public void beforeEach() {
+ final SparkConf conf = new SparkConf()
+ .setMaster("local[*]")
+ .set("spark.ui.enabled", "false")
+ .set("spark.driver.host", "localhost");
+ spark = SparkSessionProvider.getSparkSession(conf);
+ }
+
+ @AfterEach
+ public void afterEach() {
+ spark.close();
+ }
+
+ @Test
+ void testOptionsWithoutParams() {
+ final ExasolS3Table s3Table = new ExasolS3Table(schema);
+ s3Table.buildOptions(new CaseInsensitiveStringMap(basic_params));
+
+ final Configuration conf = spark.sparkContext().hadoopConfiguration();
+ assertThat(conf.get("fs.s3a.access.key"), nullValue());
+ assertThat(conf.get("fs.s3a.secret.key"), nullValue());
+ }
+
+ @Test
+ void testOptionsWithKeys() {
+ final ExasolS3Table s3Table = new ExasolS3Table(schema);
+ final Map params = new HashMap<>(basic_params);
+ params.put(Option.AWS_ACCESS_KEY_ID.key(), "some-key");
+ params.put(Option.AWS_SECRET_ACCESS_KEY.key(), "secret-key");
+ s3Table.buildOptions(new CaseInsensitiveStringMap(params));
+
+ final Configuration conf = spark.sparkContext().hadoopConfiguration();
+ assertThat(conf.get("fs.s3a.access.key"), equalTo("some-key"));
+ assertThat(conf.get("fs.s3a.secret.key"), equalTo("secret-key"));
+ }
+
+ @Test
+ void testNoDefaultCredentialsProvider() {
+ final ExasolS3Table s3Table = new ExasolS3Table(schema);
+ s3Table.buildOptions(new CaseInsensitiveStringMap(basic_params));
+
+ final Configuration conf = spark.sparkContext().hadoopConfiguration();
+ final String val = conf.get("fs.s3a.aws.credentials.provider");
+ assertThat(val, containsString("org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"));
+ assertThat(val, containsString("com.amazonaws.auth.EnvironmentVariableCredentialsProvider"));
+ assertThat(val, containsString("org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider"));
+ }
+
+ @Test
+ void testExplicitCredentialsProvider() {
+ final ExasolS3Table s3Table = new ExasolS3Table(schema);
+ final Map params = new HashMap<>(basic_params);
+ final String providerClass = "my-fancy-credentials-provider";
+ params.put(Option.AWS_CREDENTIALS_PROVIDER.key(), providerClass);
+ s3Table.buildOptions(new CaseInsensitiveStringMap(params));
+
+ final Configuration conf = spark.sparkContext().hadoopConfiguration();
+ assertThat(conf.get("fs.s3a.aws.credentials.provider"), equalTo(providerClass));
+ }
+}
diff --git a/parent-pom/pom.xml b/parent-pom/pom.xml
index 28766341..fd67708d 100644
--- a/parent-pom/pom.xml
+++ b/parent-pom/pom.xml
@@ -15,7 +15,7 @@
pk_generated_parent.pom
- 2.1.2
+ 2.1.3
8
2.20.0
5.10.0
@@ -315,6 +315,12 @@
commons-compress
1.24.0
+
+
+ org.apache.avro
+ avro
+ 1.11.3
+
org.junit.jupiter
@@ -328,6 +334,12 @@
${junit.version}
test
+
+ org.junit-pioneer
+ junit-pioneer
+ 2.1.0
+ test
+
org.mockito
mockito-junit-jupiter