Skip to content

Commit

Permalink
NIFI-12078: Add JDBC Catalog Controller Service for Iceberg processors
Browse files Browse the repository at this point in the history
Disable jdbc catalog tests on Windows OS

Fix review comments

Signed-off-by: Matt Burgess <[email protected]>

This closes #9145
  • Loading branch information
mark-bathori authored and mattyb149 committed Aug 23, 2024
1 parent 6013b93 commit f31f803
Show file tree
Hide file tree
Showing 24 changed files with 687 additions and 379 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@
<packaging>jar</packaging>

<dependencies>
<!-- Internal dependencies -->


<!-- Internal dependencies -->
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record</artifactId>
Expand Down Expand Up @@ -59,6 +58,10 @@
<artifactId>nifi-hadoop-utils</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-dbcp-service-api</artifactId>
</dependency>

<!-- External dependencies -->
<dependency>
Expand Down Expand Up @@ -86,6 +89,21 @@
<artifactId>iceberg-orc</artifactId>
<version>${iceberg.version}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-aws</artifactId>
<version>${iceberg.version}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-azure</artifactId>
<version>${iceberg.version}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-gcp</artifactId>
<version>${iceberg.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,25 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.iceberg.hive.HiveCatalog;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.jdbc.JdbcCatalog;
import org.apache.iceberg.jdbc.JdbcClientPool;
import org.apache.nifi.dbcp.DBCPService;
import org.apache.nifi.services.iceberg.IcebergCatalogProperty;
import org.apache.nifi.services.iceberg.IcebergCatalogService;

import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;

import static org.apache.nifi.processors.iceberg.IcebergUtils.getConfigurationFromFiles;
import static org.apache.nifi.services.iceberg.IcebergCatalogProperty.CATALOG_NAME;
import static org.apache.nifi.services.iceberg.IcebergCatalogProperty.FILE_IO_IMPLEMENTATION;
import static org.apache.nifi.services.iceberg.IcebergCatalogProperty.CLIENT_POOL_SERVICE;
import static org.apache.nifi.services.iceberg.IcebergCatalogProperty.METASTORE_URI;
import static org.apache.nifi.services.iceberg.IcebergCatalogProperty.WAREHOUSE_LOCATION;

Expand All @@ -44,6 +53,7 @@ public Catalog create() {
return switch (catalogService.getCatalogType()) {
case HIVE -> initHiveCatalog(catalogService);
case HADOOP -> initHadoopCatalog(catalogService);
case JDBC -> initJdbcCatalog(catalogService);
};
}

Expand All @@ -55,29 +65,47 @@ private Catalog initHiveCatalog(IcebergCatalogService catalogService) {
catalog.setConf(configuration);
}

final Map<IcebergCatalogProperty, String> catalogProperties = catalogService.getCatalogProperties();
final Map <String, String> properties = new HashMap<>();
final Map<IcebergCatalogProperty, Object> catalogProperties = catalogService.getCatalogProperties();
final Map<String, String> properties = new HashMap<>();

if (catalogProperties.containsKey(METASTORE_URI)) {
properties.put(CatalogProperties.URI, catalogProperties.get(METASTORE_URI));
properties.put(CatalogProperties.URI, (String) catalogProperties.get(METASTORE_URI));
}

if (catalogProperties.containsKey(WAREHOUSE_LOCATION)) {
properties.put(CatalogProperties.WAREHOUSE_LOCATION, catalogProperties.get(WAREHOUSE_LOCATION));
properties.put(CatalogProperties.WAREHOUSE_LOCATION, (String) catalogProperties.get(WAREHOUSE_LOCATION));
}

catalog.initialize("hive-catalog", properties);
return catalog;
}

private Catalog initHadoopCatalog(IcebergCatalogService catalogService) {
final Map<IcebergCatalogProperty, String> catalogProperties = catalogService.getCatalogProperties();
final String warehousePath = catalogProperties.get(WAREHOUSE_LOCATION);
final Map<IcebergCatalogProperty, Object> catalogProperties = catalogService.getCatalogProperties();
final String warehousePath = (String) catalogProperties.get(WAREHOUSE_LOCATION);

if (catalogService.getConfigFilePaths() != null) {
return new HadoopCatalog(getConfigurationFromFiles(catalogService.getConfigFilePaths()), warehousePath);
} else {
return new HadoopCatalog(new Configuration(), warehousePath);
}
}

private Catalog initJdbcCatalog(IcebergCatalogService catalogService) {
final Map<IcebergCatalogProperty, Object> catalogProperties = catalogService.getCatalogProperties();
final Map<String, String> properties = new HashMap<>();
properties.put(CatalogProperties.URI, "");
properties.put(CatalogProperties.WAREHOUSE_LOCATION, (String) catalogProperties.get(WAREHOUSE_LOCATION));

final Configuration configuration = getConfigurationFromFiles(catalogService.getConfigFilePaths());
final DBCPService dbcpService = (DBCPService) catalogProperties.get(CLIENT_POOL_SERVICE);

final Function<Map<String, String>, JdbcClientPool> clientPoolBuilder = props -> new IcebergJdbcClientPool(props, dbcpService);
final Function<Map<String, String>, FileIO> ioBuilder = props -> CatalogUtil.loadFileIO((String) catalogProperties.get(FILE_IO_IMPLEMENTATION), props, configuration);

JdbcCatalog catalog = new JdbcCatalog(ioBuilder, clientPoolBuilder, false);
catalog.setConf(configuration);
catalog.initialize((String) catalogProperties.get(CATALOG_NAME), properties);
return catalog;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.iceberg.catalog;

import org.apache.iceberg.jdbc.JdbcClientPool;
import org.apache.nifi.dbcp.DBCPService;

import java.sql.Connection;
import java.util.Map;

public class IcebergJdbcClientPool extends JdbcClientPool {

private final DBCPService dbcpService;

public IcebergJdbcClientPool(Map<String, String> properties, DBCPService dbcpService) {
super("", properties);
this.dbcpService = dbcpService;
}

@Override
protected Connection newClient() {
return dbcpService.getConnection();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,18 @@
<artifactId>nifi-kerberos-user-service-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-iceberg-services</artifactId>
<version>2.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-dbcp-service</artifactId>
<version>2.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.iceberg;

import org.apache.avro.Schema;
import org.apache.commons.io.IOUtils;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.types.Types;
import org.apache.nifi.avro.AvroTypeUtil;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.serialization.record.MockRecordParser;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.util.TestRunner;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;

import static org.apache.nifi.processors.iceberg.util.IcebergTestUtils.NAMESPACE;
import static org.apache.nifi.processors.iceberg.util.IcebergTestUtils.RECORD_READER_SERVICE;
import static org.apache.nifi.processors.iceberg.util.IcebergTestUtils.createTemporaryDirectory;

public class AbstractTestPutIceberg {

protected static final String TABLE_NAME = "users";

protected static final TableIdentifier TABLE_IDENTIFIER = TableIdentifier.of(NAMESPACE, TABLE_NAME);

protected static final org.apache.iceberg.Schema USER_SCHEMA = new org.apache.iceberg.Schema(
Types.NestedField.required(1, "id", Types.IntegerType.get()),
Types.NestedField.required(2, "name", Types.StringType.get()),
Types.NestedField.required(3, "department", Types.StringType.get())
);

protected TestRunner runner;
protected PutIceberg processor;
protected Catalog catalog;
protected String warehousePath;
protected static Schema inputSchema;

protected void initRecordReader() throws InitializationException {
final MockRecordParser readerFactory = new MockRecordParser();
final RecordSchema recordSchema = AvroTypeUtil.createSchema(inputSchema);

for (RecordField recordField : recordSchema.getFields()) {
readerFactory.addSchemaField(recordField);
}

readerFactory.addRecord(0, "John", "Finance");
readerFactory.addRecord(1, "Jill", "Finance");
readerFactory.addRecord(2, "James", "Marketing");
readerFactory.addRecord(3, "Joana", "Sales");

runner.addControllerService(RECORD_READER_SERVICE, readerFactory);
runner.enableControllerService(readerFactory);

runner.setProperty(PutIceberg.RECORD_READER, RECORD_READER_SERVICE);
}

@BeforeAll
public static void initSchema() throws IOException {
final String avroSchema = IOUtils.toString(Files.newInputStream(Paths.get("src/test/resources/user.avsc")), StandardCharsets.UTF_8);
inputSchema = new Schema.Parser().parse(avroSchema);
}

@BeforeEach
public void setUp() {
warehousePath = createTemporaryDirectory().getAbsolutePath();
processor = new PutIceberg();
}

@AfterEach
public void tearDown() {
catalog.dropTable(TABLE_IDENTIFIER);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,24 @@
*/
package org.apache.nifi.processors.iceberg;

import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.iceberg.io.TaskWriter;
import org.apache.iceberg.io.WriteResult;
import org.apache.iceberg.types.Types;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processors.iceberg.catalog.IcebergCatalogFactory;
import org.apache.nifi.processors.iceberg.catalog.TestHadoopCatalogService;
import org.apache.nifi.processors.iceberg.converter.IcebergRecordConverter;
import org.apache.nifi.processors.iceberg.writer.IcebergTaskWriterFactory;
import org.apache.nifi.serialization.SimpleRecordSchema;
Expand All @@ -52,6 +51,7 @@
import org.junit.jupiter.api.condition.DisabledOnOs;
import org.mockito.Mockito;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
Expand All @@ -64,6 +64,7 @@
import static org.apache.nifi.processors.iceberg.PutIceberg.MAXIMUM_COMMIT_WAIT_TIME;
import static org.apache.nifi.processors.iceberg.PutIceberg.MINIMUM_COMMIT_WAIT_TIME;
import static org.apache.nifi.processors.iceberg.PutIceberg.NUMBER_OF_COMMIT_RETRIES;
import static org.apache.nifi.processors.iceberg.util.IcebergTestUtils.createTemporaryDirectory;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.condition.OS.WINDOWS;
import static org.mockito.Mockito.doThrow;
Expand Down Expand Up @@ -196,10 +197,9 @@ public void testMaxCommitDurationExceeded() {
verify(appender, times(2)).commit();
}

private Table initCatalog() throws IOException {
TestHadoopCatalogService catalogService = new TestHadoopCatalogService();
IcebergCatalogFactory catalogFactory = new IcebergCatalogFactory(catalogService);
Catalog catalog = catalogFactory.create();
private Table initCatalog() {
final File warehousePath = createTemporaryDirectory();
final HadoopCatalog catalog = new HadoopCatalog(new Configuration(), warehousePath.getAbsolutePath());

return catalog.createTable(TABLE_IDENTIFIER, ABORT_SCHEMA, PartitionSpec.unpartitioned());
}
Expand Down
Loading

0 comments on commit f31f803

Please sign in to comment.