diff --git a/extended/build.gradle b/extended/build.gradle index 87504eb534..f0c444d8fc 100644 --- a/extended/build.gradle +++ b/extended/build.gradle @@ -26,7 +26,7 @@ jar { } compileKotlin { - kotlinOptions.jvmTarget = "17" + kotlinOptions.jvmTarget = JavaVersion.VERSION_17 } generateGrammarSource { @@ -62,6 +62,10 @@ dependencies { exclude group: 'org.abego.treelayout' } + def kotlinVersion = "1.6.0" + def kafkaVersion = "2.4.0" + def jacksonVersion = "2.17.2" + def withoutServers = { exclude group: 'org.eclipse.jetty' exclude group: 'org.eclipse.jetty.aggregate' @@ -105,7 +109,10 @@ dependencies { } compileOnly group: 'com.couchbase.client', name: 'java-client', version: '3.3.0', withoutJacksons compileOnly group: 'io.lettuce', name: 'lettuce-core', version: '6.1.1.RELEASE' - compileOnly group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: '2.14.0', withoutJacksons + compileOnly group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: jacksonVersion + testImplementation group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: jacksonVersion + compileOnly group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: jacksonVersion + testImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: jacksonVersion compileOnly group: 'com.amazonaws', name: 'aws-java-sdk-s3', version: '1.11.270' compileOnly group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.12.353' , withoutJacksons compileOnly group: 'com.sun.mail', name: 'javax.mail', version: '1.6.0' @@ -118,6 +125,12 @@ dependencies { compileOnly group: 'org.apache.arrow', name: 'arrow-vector', version: '13.0.0' compileOnly group: 'org.apache.arrow', name: 'arrow-memory-netty', version: '13.0.0' + compileOnly group: 'org.jetbrains.kotlinx', name: 'kotlinx-coroutines-core', version: '1.4.2' + compileOnly group: 'org.apache.kafka', name: 'kafka-clients', version: kafkaVersion + compileOnly group: 'com.github.conker84', name: 'neo4j-configuration-lifecycle', version: 'ad59084711' + compileOnly group: 'io.confluent', name: 'kafka-avro-serializer', version: '5.2.2' + + testImplementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.16.1' testImplementation group: 'org.apache.arrow', name: 'arrow-vector', version: '13.0.0' testImplementation group: 'org.apache.arrow', name: 'arrow-memory-netty', version: '13.0.0' @@ -145,6 +158,16 @@ dependencies { testImplementation group: 'org.mockito', name: 'mockito-core', version: '5.4.0' testImplementation group: 'org.apache.parquet', name: 'parquet-hadoop', version: '1.13.1', withoutServers testImplementation group: 'com.opencsv', name: 'opencsv', version: '5.7.1' + testImplementation group: 'org.jetbrains.kotlinx', name: 'kotlinx-coroutines-core', version: '1.4.2' +// testImplementation group: 'org.jetbrains.kotlin', name: 'kotlin-test-junit', version: kotlinVersion +// testImplementation group: 'org.jetbrains.kotlin', name: 'kotlin-test-junit5', version: kotlinVersion + + testImplementation group: 'org.jetbrains.kotlin', name: 'kotlin-test', version: '1.6.0' + + testImplementation group: 'org.apache.kafka', name: 'kafka-clients', version: kafkaVersion + testImplementation group: 'io.confluent', name: 'kafka-avro-serializer', version: '5.2.2' + testImplementation group: 'org.testcontainers', name: 'kafka', version: testContainersVersion + testImplementation group: 'com.github.conker84', name: 'neo4j-configuration-lifecycle', version: 'ad59084711' configurations.all { exclude group: 'org.slf4j', module: 'slf4j-nop' diff --git a/extended/src/main/java/apoc/ExtendedApocConfig.java b/extended/src/main/java/apoc/ExtendedApocConfig.java index f5249cda37..e89cc12241 100644 --- a/extended/src/main/java/apoc/ExtendedApocConfig.java +++ b/extended/src/main/java/apoc/ExtendedApocConfig.java @@ -44,6 +44,7 @@ public class ExtendedApocConfig extends LifecycleAdapter public static final String APOC_ML_WATSON_URL = "apoc.ml.watson.url"; public static final String APOC_AWS_KEY_ID = "apoc.aws.key.id"; public static final String APOC_AWS_SECRET_KEY = "apoc.aws.secret.key"; + public static final String APOC_KAFKA_ENABLED = "apoc.kafka.enabled"; public enum UuidFormatType { hex, base64 } // These were earlier added via the Neo4j config using the ApocSettings.java class @@ -73,6 +74,25 @@ public enum UuidFormatType { hex, base64 } public static final String CONFIG_DIR = "config-dir="; + private static final String CONF_DIR_ARG = "config-dir="; + private static final String SOURCE_ENABLED = "apoc.kafka.source.enabled"; + private static final boolean SOURCE_ENABLED_VALUE = true; + private static final String PROCEDURES_ENABLED = "apoc.kafka.procedures.enabled"; + private static final boolean PROCEDURES_ENABLED_VALUE = true; + private static final String SINK_ENABLED = "apoc.kafka.sink.enabled"; + private static final boolean SINK_ENABLED_VALUE = false; + private static final String CHECK_APOC_TIMEOUT = "apoc.kafka.check.apoc.timeout"; + private static final String CHECK_APOC_INTERVAL = "apoc.kafka.check.apoc.interval"; + private static final String CLUSTER_ONLY = "apoc.kafka.cluster.only"; + private static final String CHECK_WRITEABLE_INSTANCE_INTERVAL = "apoc.kafka.check.writeable.instance.interval"; + private static final String SYSTEM_DB_WAIT_TIMEOUT = "apoc.kafka.systemdb.wait.timeout"; + private static final long SYSTEM_DB_WAIT_TIMEOUT_VALUE = 10000L; + private static final String POLL_INTERVAL = "apoc.kafka.sink.poll.interval"; + private static final String INSTANCE_WAIT_TIMEOUT = "apoc.kafka.wait.timeout"; + private static final long INSTANCE_WAIT_TIMEOUT_VALUE = 120000L; + private static final int DEFAULT_TRIGGER_PERIOD = 10000; + private static final String DEFAULT_PATH = "."; + public ExtendedApocConfig(LogService log, GlobalProcedures globalProceduresRegistry, String defaultConfigPath) { this.log = log.getInternalLog(ApocConfig.class); this.defaultConfigPath = defaultConfigPath; diff --git a/extended/src/main/java/apoc/ExtendedApocGlobalComponents.java b/extended/src/main/java/apoc/ExtendedApocGlobalComponents.java index 3ef25bb5b3..9c84ce2978 100644 --- a/extended/src/main/java/apoc/ExtendedApocGlobalComponents.java +++ b/extended/src/main/java/apoc/ExtendedApocGlobalComponents.java @@ -12,13 +12,19 @@ import org.neo4j.kernel.availability.AvailabilityListener; import org.neo4j.kernel.internal.GraphDatabaseAPI; import org.neo4j.kernel.lifecycle.Lifecycle; +import org.neo4j.logging.Log; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import static apoc.ExtendedApocConfig.APOC_KAFKA_ENABLED; + @ServiceProvider public class ExtendedApocGlobalComponents implements ApocGlobalComponents { @@ -37,31 +43,55 @@ public Map getServices(GraphDatabaseAPI db, ApocExtensionFact ); cypherProcedureHandlers.put(db, cypherProcedureHandler); - return Map.of( + Map serviceMap = new HashMap<>(); + serviceMap.put("ttl", new TTLLifeCycle(dependencies.scheduler(), + db, + TTLConfig.ttlConfig(), + dependencies.log().getUserLog(TTLLifeCycle.class))); - "ttl", new TTLLifeCycle(dependencies.scheduler(), - db, - TTLConfig.ttlConfig(), - dependencies.log().getUserLog(TTLLifeCycle.class)), + serviceMap.put("uuid", new UuidHandler(db, + dependencies.databaseManagementService(), + dependencies.log().getUserLog(Uuid.class), + dependencies.apocConfig(), + dependencies.scheduler(), + dependencies.pools())); - "uuid", new UuidHandler(db, - dependencies.databaseManagementService(), - dependencies.log().getUserLog(Uuid.class), - dependencies.apocConfig(), - dependencies.scheduler(), - dependencies.pools()), + serviceMap.put("directory", new LoadDirectoryHandler(db, + dependencies.log().getUserLog(LoadDirectory.class), + dependencies.pools())); - "directory", new LoadDirectoryHandler(db, - dependencies.log().getUserLog(LoadDirectory.class), - dependencies.pools()), + serviceMap.put("cypherProcedures", cypherProcedureHandler); + + if (dependencies.apocConfig().getBoolean(APOC_KAFKA_ENABLED)) { + try { + Class kafkaHandlerClass = Class.forName("apoc.kafka.KafkaHandler"); + Lifecycle kafkaHandler = (Lifecycle) kafkaHandlerClass + .getConstructor(GraphDatabaseAPI.class, Log.class) + .newInstance(db, dependencies.log().getUserLog(kafkaHandlerClass)); + + serviceMap.put("kafkaHandler", kafkaHandler); + } catch (Exception e) { + dependencies.log().getUserLog(ExtendedApocGlobalComponents.class) + .warn(""" + Cannot find the Kafka extra jar. + Please put the apoc-kafka-dependencies-5.x.x-all.jar into plugin folder. + See the documentation: https://neo4j.com/labs/apoc/5/overview/apoc.kakfa"""); + } + } + + return serviceMap; - "cypherProcedures", cypherProcedureHandler - ); } @Override public Collection getContextClasses() { - return List.of(CypherProceduresHandler.class, UuidHandler.class, LoadDirectoryHandler.class); + List contextClasses = new ArrayList<>( + Arrays.asList(CypherProceduresHandler.class, UuidHandler.class, LoadDirectoryHandler.class) + ); + try { + contextClasses.add(Class.forName("apoc.kafka.KafkaHandler")); + } catch (ClassNotFoundException ignored) {} + return contextClasses; } @Override @@ -69,4 +99,4 @@ public Iterable getListeners(GraphDatabaseAPI db, ApocExte CypherProceduresHandler cypherProceduresHandler = cypherProcedureHandlers.get(db); return cypherProceduresHandler==null ? Collections.emptyList() : Collections.singleton(cypherProceduresHandler); } -} +} \ No newline at end of file diff --git a/extended/src/main/java/apoc/generate/Generate.java b/extended/src/main/java/apoc/generate/Generate.java index 16ca04011e..0a4005ffe5 100644 --- a/extended/src/main/java/apoc/generate/Generate.java +++ b/extended/src/main/java/apoc/generate/Generate.java @@ -70,7 +70,7 @@ public void complete(@Name("noNodes") Long noNodes, @Name("label") String label, @Procedure(name = "apoc.generate.simple",mode = Mode.WRITE) @Description("apoc.generate.simple(degrees, label, type) - generates a simple random graph according to the given degree distribution") public void simple(@Name("degrees") List degrees, @Name("label") String label, @Name("type") String relationshipType) throws IOException { - if (degrees == null) degrees = Arrays.asList(2L, 2L, 2L, 2L); + if (degrees == null) degrees = java.util.Arrays.asList(2L, 2L, 2L, 2L); List intDegrees = degrees.stream().map(Long::intValue).collect(Collectors.toList()); diff --git a/extended/src/main/java/apoc/load/Jdbc.java b/extended/src/main/java/apoc/load/Jdbc.java index d198ac26cb..4a0f14260d 100644 --- a/extended/src/main/java/apoc/load/Jdbc.java +++ b/extended/src/main/java/apoc/load/Jdbc.java @@ -103,7 +103,7 @@ private Stream executeQuery(String urlOrKey, String tableOrSelect, Ma } } - @Procedure(mode = Mode.DBMS) + @Procedure(mode = Mode.WRITE) @Description("apoc.load.jdbcUpdate('key or url','statement',[params],config) YIELD row - update relational database, from a SQL statement with optional parameters") public Stream jdbcUpdate(@Name("jdbc") String urlOrKey, @Name("query") String query, @Name(value = "params", defaultValue = "[]") List params, @Name(value = "config",defaultValue = "{}") Map config) { log.info( String.format( "Executing SQL update: %s", query ) ); diff --git a/extended/src/main/kotlin/apoc/kafka/KafkaHandler.kt b/extended/src/main/kotlin/apoc/kafka/KafkaHandler.kt new file mode 100644 index 0000000000..973950676b --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/KafkaHandler.kt @@ -0,0 +1,50 @@ +package apoc.kafka + +import apoc.ApocConfig +import apoc.ExtendedApocConfig.APOC_KAFKA_ENABLED +import apoc.kafka.config.StreamsConfig +import apoc.kafka.consumer.StreamsSinkConfigurationListener +import apoc.kafka.producer.StreamsRouterConfigurationListener +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.kernel.lifecycle.LifecycleAdapter +import org.neo4j.logging.Log + +class KafkaHandler(): LifecycleAdapter() { + + private lateinit var db: GraphDatabaseAPI + private lateinit var log: Log + + constructor(db: GraphDatabaseAPI, log: Log) : this() { + this.db = db + this.log = log + } + + override fun start() { + if(ApocConfig.apocConfig().getBoolean(APOC_KAFKA_ENABLED)) { +// println("start db......") + + try { + StreamsRouterConfigurationListener(db, log) + .start(StreamsConfig.getConfiguration()) + } catch (e: Exception) { + log.error("Exception in StreamsRouterConfigurationListener {}", e.message) + } + + try { + StreamsSinkConfigurationListener(db, log) + .start(StreamsConfig.getConfiguration()) + } catch (e: Exception) { + log.error("Exception in StreamsSinkConfigurationListener {}", e.message) + } + } + } + + override fun stop() { + if(ApocConfig.apocConfig().getBoolean(APOC_KAFKA_ENABLED)) { +// println("stop db..........") + + StreamsRouterConfigurationListener(db, log).shutdown() + StreamsSinkConfigurationListener(db, log).shutdown() + } + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/Neo4jStreamsStrategyStorage.kt b/extended/src/main/kotlin/apoc/kafka/Neo4jStreamsStrategyStorage.kt new file mode 100644 index 0000000000..f415f5cfcd --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/Neo4jStreamsStrategyStorage.kt @@ -0,0 +1,43 @@ +//package apoc.kafka +// +//import apoc.kafka.consumer.StreamsSinkConfiguration +//import apoc.kafka.consumer.StreamsTopicService +//import apoc.kafka.extensions.isDefaultDb +//import apoc.kafka.service.StreamsStrategyStorage +//import apoc.kafka.service.TopicType +//import apoc.kafka.service.sink.strategy.* +//import org.neo4j.graphdb.GraphDatabaseService +// +//class Neo4jStreamsStrategyStorage(private val streamsTopicService: StreamsTopicService, +// private val streamsConfig: Map, +// private val db: GraphDatabaseService): StreamsStrategyStorage() { +// +// override fun getTopicType(topic: String): TopicType? { +// return streamsTopicService.getTopicType(topic) +// } +// +// private fun getTopicsByTopicType(topicType: TopicType): T = streamsTopicService.getByTopicType(topicType) as T +// +// override fun getStrategy(topic: String): IngestionStrategy = when (val topicType = getTopicType(topic)) { +// TopicType.CDC_SOURCE_ID -> { +// val strategyConfig = StreamsSinkConfiguration +// .createSourceIdIngestionStrategyConfig(streamsConfig, db.databaseName(), db.isDefaultDb()) +// SourceIdIngestionStrategy(strategyConfig) +// } +// TopicType.CDC_SCHEMA -> SchemaIngestionStrategy() +// TopicType.CUD -> CUDIngestionStrategy() +// TopicType.PATTERN_NODE -> { +// val map = getTopicsByTopicType>(topicType) +// NodePatternIngestionStrategy(map.getValue(topic)) +// } +// TopicType.PATTERN_RELATIONSHIP -> { +// val map = getTopicsByTopicType>(topicType) +// RelationshipPatternIngestionStrategy(map.getValue(topic)) +// } +// TopicType.CYPHER -> { +// CypherTemplateStrategy(streamsTopicService.getCypherTemplate(topic)!!) +// } +// else -> throw RuntimeException("Topic Type not Found") +// } +// +//} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/PublishProcedures.kt b/extended/src/main/kotlin/apoc/kafka/PublishProcedures.kt new file mode 100644 index 0000000000..111a736e76 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/PublishProcedures.kt @@ -0,0 +1,111 @@ +package apoc.kafka + +import apoc.kafka.producer.StreamsEventRouter +//import apoc.kafka.producer.StreamsTransactionEventHandler +import apoc.kafka.producer.events.StreamsEventBuilder +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.checkEnabled +import kotlinx.coroutines.runBlocking +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log +import org.neo4j.procedure.Context +import org.neo4j.procedure.Description +import org.neo4j.procedure.Mode +import org.neo4j.procedure.Name +import org.neo4j.procedure.Procedure +import java.util.concurrent.ConcurrentHashMap +import java.util.stream.Stream + +data class StreamPublishResult(@JvmField val value: Map) + +data class StreamsEventSinkStoreEntry(val eventRouter: StreamsEventRouter, + // val txHandler: StreamsTransactionEventHandler +) +class PublishProcedures { + + @JvmField @Context + var db: GraphDatabaseAPI? = null + + @JvmField @Context var log: Log? = null + + @Procedure(mode = Mode.READ, name = "apoc.kafka.publish.sync") + @Description("apoc.kafka.publish.sync(topic, payload, config) - Allows custom synchronous streaming from Neo4j to the configured stream environment") + fun sync(@Name("topic") topic: String?, @Name("payload") payload: Any?, + @Name(value = "config", defaultValue = "{}") config: Map?): Stream { + checkEnabled() + if (isTopicNullOrEmpty(topic)) { + return Stream.empty() + } + checkPayloadNotNull(payload) + + val streamsEvent = buildStreamEvent(topic!!, payload!!) + return getStreamsEventSinkStoreEntry().eventRouter + .sendEventsSync(topic, listOf(streamsEvent), config ?: emptyMap()) + .map { StreamPublishResult(it) } + .stream() + } + + @Procedure(mode = Mode.READ, name = "apoc.kafka.publish") + @Description("apoc.kafka.publish(topic, payload, config) - Allows custom streaming from Neo4j to the configured stream environment") + fun publish(@Name("topic") topic: String?, @Name("payload") payload: Any?, + @Name(value = "config", defaultValue = "{}") config: Map?) = runBlocking { + checkEnabled() + if (isTopicNullOrEmpty(topic)) { + return@runBlocking + } + checkPayloadNotNull(payload) + + val streamsEvent = buildStreamEvent(topic!!, payload!!) + getStreamsEventSinkStoreEntry().eventRouter.sendEvents(topic, listOf(streamsEvent), config ?: emptyMap()) + } + + private fun isTopicNullOrEmpty(topic: String?): Boolean { + return if (topic.isNullOrEmpty()) { + log?.info("Topic empty, no message sent") + true + } else { + false + } + } + + private fun checkPayloadNotNull(payload: Any?) { + if (payload == null) { + log?.error("Payload empty, no message sent") + throw RuntimeException("Payload may not be null") + } + } + + private fun buildStreamEvent(topic: String, payload: Any) = StreamsEventBuilder() + .withPayload(payload) + .withNodeRoutingConfiguration(getStreamsEventSinkStoreEntry() + .eventRouter + .eventRouterConfiguration + .nodeRouting + .firstOrNull { it.topic == topic }) + .withRelationshipRoutingConfiguration(getStreamsEventSinkStoreEntry() + .eventRouter + .eventRouterConfiguration + .relRouting + .firstOrNull { it.topic == topic }) + .withTopic(topic) + .build() + + private fun getStreamsEventSinkStoreEntry() = streamsEventRouterStore[db!!.databaseName()]!! + + companion object { + + private val streamsEventRouterStore = ConcurrentHashMap() + + fun register( + db: GraphDatabaseAPI, + evtRouter: StreamsEventRouter, +// txHandler: StreamsTransactionEventHandler + ) { + streamsEventRouterStore[KafkaUtil.getName(db)] = StreamsEventSinkStoreEntry(evtRouter) + } + + fun unregister(db: GraphDatabaseAPI) { + streamsEventRouterStore.remove(KafkaUtil.getName(db)) + } + } +} diff --git a/extended/src/main/kotlin/apoc/kafka/config/StreamsConfig.kt b/extended/src/main/kotlin/apoc/kafka/config/StreamsConfig.kt new file mode 100644 index 0000000000..67a9b07e7d --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/config/StreamsConfig.kt @@ -0,0 +1,62 @@ +package apoc.kafka.config + +import apoc.ApocConfig +import org.apache.commons.configuration2.ConfigurationMap +import org.apache.kafka.clients.consumer.ConsumerConfig + +class StreamsConfig { + + companion object { + + fun getConfiguration(additionalConfigs: Map = emptyMap()): Map { + val config = ApocConfig.apocConfig().config + + val map = ConfigurationMap(config) + .filter { it.value is String } + .toMutableMap() as Map + return convert(map, additionalConfigs) + } + + const val SOURCE_ENABLED = "apoc.kafka.source.enabled" + const val SOURCE_ENABLED_VALUE = true + const val PROCEDURES_ENABLED = "apoc.kafka.procedures.enabled" + const val PROCEDURES_ENABLED_VALUE = true + const val SINK_ENABLED = "apoc.kafka.sink.enabled" + const val SINK_ENABLED_VALUE = false + const val CHECK_APOC_TIMEOUT = "apoc.kafka.check.apoc.timeout" + const val CHECK_APOC_INTERVAL = "apoc.kafka.check.apoc.interval" + const val CLUSTER_ONLY = "apoc.kafka.cluster.only" + const val CHECK_WRITEABLE_INSTANCE_INTERVAL = "apoc.kafka.check.writeable.instance.interval" + const val POLL_INTERVAL = "apoc.kafka.sink.poll.interval" + const val INSTANCE_WAIT_TIMEOUT = "apoc.kafka.wait.timeout" + const val INSTANCE_WAIT_TIMEOUT_VALUE = 120000L + + fun isSourceGloballyEnabled(config: Map) = config.getOrDefault(SOURCE_ENABLED, SOURCE_ENABLED_VALUE).toString().toBoolean() + + fun isSourceEnabled(config: Map, dbName: String) = config.getOrDefault("${SOURCE_ENABLED}.from.$dbName", isSourceGloballyEnabled(config)).toString().toBoolean() + + fun hasProceduresGloballyEnabled(config: Map) = config.getOrDefault(PROCEDURES_ENABLED, PROCEDURES_ENABLED_VALUE).toString().toBoolean() + + fun hasProceduresEnabled(config: Map, dbName: String) = config.getOrDefault("${PROCEDURES_ENABLED}.$dbName", hasProceduresGloballyEnabled(config)).toString().toBoolean() + + fun isSinkGloballyEnabled(config: Map) = config.getOrDefault(SINK_ENABLED, SINK_ENABLED_VALUE).toString().toBoolean() + + fun isSinkEnabled(config: Map, dbName: String) = config.getOrDefault("${SINK_ENABLED}.to.$dbName", isSinkGloballyEnabled(config)).toString().toBoolean() + + fun getInstanceWaitTimeout(config: Map) = config.getOrDefault(INSTANCE_WAIT_TIMEOUT, INSTANCE_WAIT_TIMEOUT_VALUE).toString().toLong() + + fun convert(props: Map, config: Map): Map { + val mutProps = props.toMutableMap() + val mappingKeys = mapOf( + "broker" to "apoc.kafka.${ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG}", + "from" to "apoc.kafka.${ConsumerConfig.AUTO_OFFSET_RESET_CONFIG}", + "autoCommit" to "apoc.kafka.${ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG}", + "keyDeserializer" to "apoc.kafka.${ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG}", + "valueDeserializer" to "apoc.kafka.${ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG}", + "schemaRegistryUrl" to "apoc.kafka.schema.registry.url", + "groupId" to "apoc.kafka.${ConsumerConfig.GROUP_ID_CONFIG}") + mutProps += config.mapKeys { mappingKeys.getOrDefault(it.key, it.key) } + return mutProps + } + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/Neo4jStreamsStrategyStorage.kt b/extended/src/main/kotlin/apoc/kafka/consumer/Neo4jStreamsStrategyStorage.kt new file mode 100644 index 0000000000..429f79b7ee --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/Neo4jStreamsStrategyStorage.kt @@ -0,0 +1,52 @@ +//package apoc.kafka.consumer +// +//import org.neo4j.graphdb.GraphDatabaseService +//import apoc.kafka.extensions.isDefaultDb +//import apoc.kafka.service.StreamsStrategyStorage +//import apoc.kafka.service.TopicType +//import apoc.kafka.service.sink.strategy.CUDIngestionStrategy +//import apoc.kafka.service.sink.strategy.CypherTemplateStrategy +//import apoc.kafka.service.sink.strategy.IngestionStrategy +//import apoc.kafka.service.sink.strategy.NodePatternConfiguration +//import apoc.kafka.service.sink.strategy.NodePatternIngestionStrategy +//import apoc.kafka.service.sink.strategy.RelationshipPatternConfiguration +//import apoc.kafka.service.sink.strategy.RelationshipPatternIngestionStrategy +//import apoc.kafka.service.sink.strategy.SchemaIngestionStrategy +//import apoc.kafka.service.sink.strategy.SourceIdIngestionStrategy +// +//class Neo4jStreamsStrategyStorage(private val streamsTopicService: StreamsTopicService, +// private val streamsConfig: Map, +// private val db: GraphDatabaseService): StreamsStrategyStorage() { +// +// override fun getTopicType(topic: String): TopicType? { +// return streamsTopicService.getTopicType(topic) +// } +// +// private fun getTopicsByTopicType(topicType: TopicType): T = streamsTopicService.getByTopicType(topicType) as T +// +// override fun getStrategy(topic: String): IngestionStrategy = when (val topicType = getTopicType(topic)) { +// TopicType.CDC_SOURCE_ID -> { +// val strategyConfig = StreamsSinkConfiguration.createSourceIdIngestionStrategyConfig( +// streamsConfig, +// db.databaseName(), +// db.isDefaultDb() +// ) +// SourceIdIngestionStrategy(strategyConfig) +// } +// TopicType.CDC_SCHEMA -> SchemaIngestionStrategy() +// TopicType.CUD -> CUDIngestionStrategy() +// TopicType.PATTERN_NODE -> { +// val map = getTopicsByTopicType>(topicType) +// NodePatternIngestionStrategy(map.getValue(topic)) +// } +// TopicType.PATTERN_RELATIONSHIP -> { +// val map = getTopicsByTopicType>(topicType) +// RelationshipPatternIngestionStrategy(map.getValue(topic)) +// } +// TopicType.CYPHER -> { +// CypherTemplateStrategy(streamsTopicService.getCypherTemplate(topic)!!) +// } +// else -> throw RuntimeException("Topic Type not Found") +// } +// +//} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventConsumer.kt b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventConsumer.kt new file mode 100644 index 0000000000..cdce9d7da2 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventConsumer.kt @@ -0,0 +1,24 @@ +package apoc.kafka.consumer + +import org.neo4j.logging.Log +import apoc.kafka.service.StreamsSinkEntity + + +abstract class StreamsEventConsumer(log: Log, topics: Set) { + + abstract fun stop() + + abstract fun start() + + abstract fun read(topicConfig: Map = emptyMap(), action: (String, List) -> Unit) + + abstract fun read(action: (String, List) -> Unit) + + fun invalidTopics(): List = emptyList() + +} + + +abstract class StreamsEventConsumerFactory { + abstract fun createStreamsEventConsumer(config: Map, log: Log, topics: Set): StreamsEventConsumer +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventSink.kt b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventSink.kt new file mode 100644 index 0000000000..60dbb62c5e --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventSink.kt @@ -0,0 +1,29 @@ +package apoc.kafka.consumer + +import apoc.kafka.consumer.kafka.KafkaEventSink +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log +import apoc.kafka.events.StreamsPluginStatus + +object StreamsEventSinkFactory { + fun getStreamsEventSink(config: Map, //streamsQueryExecution: StreamsEventSinkQueryExecution, + /* streamsTopicService: StreamsTopicService, */log: Log, db: GraphDatabaseAPI): KafkaEventSink { +// return Class.forName(config.getOrDefault("apoc.kafka.sink", "apoc.kafka.consumer.kafka.KafkaEventSink")) +// .getConstructor(Map::class.java, +// StreamsEventSinkQueryExecution::class.java, +// StreamsTopicService::class.java, +// Log::class.java, +// GraphDatabaseAPI::class.java) +// .newInstance(config, streamsQueryExecution, streamsTopicService, log, db) + return KafkaEventSink(/*config, streamsQueryExecution, streamsTopicService, log, */db) + } +} + +open class StreamsEventSinkConfigMapper(private val streamsConfigMap: Map, private val mappingKeys: Map) { + open fun convert(config: Map): Map { + val props = streamsConfigMap + .toMutableMap() + props += config.mapKeys { mappingKeys.getOrDefault(it.key, it.key) } + return props + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventSinkQueryExecution.kt b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventSinkQueryExecution.kt new file mode 100644 index 0000000000..0220ae54c6 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsEventSinkQueryExecution.kt @@ -0,0 +1,32 @@ +package apoc.kafka.consumer + +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log +import apoc.kafka.extensions.execute +import apoc.kafka.service.StreamsSinkService +import apoc.kafka.service.StreamsStrategyStorage +import apoc.kafka.consumer.utils.ConsumerUtils + +class NotInWriteableInstanceException(message: String): RuntimeException(message) + +class StreamsEventSinkQueryExecution(private val db: GraphDatabaseAPI, + private val log: Log, + streamsStrategyStorage: StreamsStrategyStorage): + StreamsSinkService(streamsStrategyStorage) { + + override fun write(query: String, params: Collection) { + if (params.isEmpty()) return + if (ConsumerUtils.isWriteableInstance(db)) { + db.execute(query, mapOf("events" to params)) { + if (log.isDebugEnabled) { + log.debug("Query statistics:\n${it.queryStatistics}") + } + } + } else { + if (log.isDebugEnabled) { + log.debug("Not writeable instance") + } + NotInWriteableInstanceException("Not writeable instance") + } + } +} diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/StreamsSinkConfiguration.kt b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsSinkConfiguration.kt new file mode 100644 index 0000000000..e01d542763 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsSinkConfiguration.kt @@ -0,0 +1,97 @@ +//package apoc.kafka.consumer +// +//import apoc.kafka.config.StreamsConfig +//import apoc.kafka.extensions.toPointCase +//import apoc.kafka.utils.JSONUtils +//import apoc.kafka.service.TopicUtils +//import apoc.kafka.service.TopicValidationException +//import apoc.kafka.service.Topics +//import apoc.kafka.service.sink.strategy.SourceIdIngestionStrategyConfig +//import java.util.concurrent.TimeUnit +// +//data class StreamsSinkConfiguration(val enabled: Boolean = StreamsConfig.SINK_ENABLED_VALUE, +// val proceduresEnabled: Boolean = StreamsConfig.PROCEDURES_ENABLED_VALUE, +// val topics: Topics = Topics(), +// val errorConfig: Map = emptyMap(), +// val checkApocTimeout: Long = -1, +// val checkApocInterval: Long = 1000, +// val clusterOnly: Boolean = false, +// val checkWriteableInstanceInterval: Long = TimeUnit.MINUTES.toMillis(3), +// val pollInterval: Long = TimeUnit.SECONDS.toMillis(0), +// val sourceIdStrategyConfig: SourceIdIngestionStrategyConfig = SourceIdIngestionStrategyConfig()) { +// +// fun asMap(): Map { +// val configMap = JSONUtils.asMap(this) +// .filterKeys { it != "topics" && it != "enabled" && it != "proceduresEnabled" && !it.startsWith("check") } +// .mapKeys { it.key.toPointCase() } +// .mapKeys { +// when (it.key) { +// "error.config" -> "apoc.kafka.sink.errors" +// "procedures.enabled" -> "apoc.kafka.${it.key}" +// "cluster.only" -> "apoc.kafka.${it.key}" +// else -> if (it.key.startsWith("apoc.kafka.sink")) it.key else "apoc.kafka.sink.${it.key}" +// } +// } +// val topicMap = this.topics.asMap() +// .mapKeys { it.key.key } +// val invalidTopics = mapOf("invalid_topics" to this.topics.invalid) +// return (configMap + topicMap + invalidTopics) +// } +// +// companion object { +// fun from(configMap: Map, dbName: String, invalidTopics: List = emptyList(), isDefaultDb: Boolean): StreamsSinkConfiguration { +// val default = StreamsSinkConfiguration() +// +// var topics = Topics.from(map = configMap, dbName = dbName, invalidTopics = invalidTopics) +// if (isDefaultDb) { +// topics += Topics.from(map = configMap, invalidTopics = invalidTopics) +// } +// +// TopicUtils.validate(topics) +// +// val sourceIdStrategyConfig = createSourceIdIngestionStrategyConfig(configMap, dbName, isDefaultDb) +// +// val errorHandler = configMap +// .filterKeys { it.startsWith("apoc.kafka.sink.error") } +// .mapKeys { it.key.substring("apoc.kafka.sink.".length) } +// +// +// return default.copy(enabled = StreamsConfig.isSinkEnabled(configMap, dbName), +// proceduresEnabled = StreamsConfig.hasProceduresEnabled(configMap, dbName), +// topics = topics, +// errorConfig = errorHandler, +// checkApocTimeout = configMap.getOrDefault(StreamsConfig.CHECK_APOC_TIMEOUT, +// default.checkApocTimeout) +// .toString() +// .toLong(), +// checkApocInterval = configMap.getOrDefault(StreamsConfig.CHECK_APOC_INTERVAL, +// default.checkApocInterval) +// .toString() +// .toLong(), +// checkWriteableInstanceInterval = configMap.getOrDefault(StreamsConfig.CHECK_WRITEABLE_INSTANCE_INTERVAL, +// default.checkWriteableInstanceInterval) +// .toString().toLong(), +// pollInterval = configMap.getOrDefault(StreamsConfig.POLL_INTERVAL, default.pollInterval) +// .toString().toLong(), +// clusterOnly = configMap.getOrDefault(StreamsConfig.CLUSTER_ONLY, +// default.clusterOnly) +// .toString().toBoolean(), +// sourceIdStrategyConfig = sourceIdStrategyConfig) +// } +// +// fun createSourceIdIngestionStrategyConfig(configMap: Map, dbName: String, isDefaultDb: Boolean): SourceIdIngestionStrategyConfig { +// val sourceIdStrategyConfigPrefix = "apoc.kafka.sink.topic.cdc.sourceId" +// val (sourceIdStrategyLabelNameKey, sourceIdStrategyIdNameKey) = if (isDefaultDb) { +// "labelName" to "idName" +// } else { +// "labelName.to.$dbName" to "idName.to.$dbName" +// } +// val defaultSourceIdStrategyConfig = SourceIdIngestionStrategyConfig() +// return SourceIdIngestionStrategyConfig( +// configMap.getOrDefault("$sourceIdStrategyConfigPrefix.$sourceIdStrategyLabelNameKey", defaultSourceIdStrategyConfig.labelName), +// configMap.getOrDefault("$sourceIdStrategyConfigPrefix.$sourceIdStrategyIdNameKey", defaultSourceIdStrategyConfig.idName)) +// } +// +// } +// +//} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/StreamsSinkConfigurationListener.kt b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsSinkConfigurationListener.kt new file mode 100644 index 0000000000..cd19c6a9c7 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsSinkConfigurationListener.kt @@ -0,0 +1,100 @@ +package apoc.kafka.consumer + +import apoc.kafka.config.StreamsConfig +import apoc.kafka.consumer.kafka.KafkaEventSink +import apoc.kafka.consumer.kafka.KafkaSinkConfiguration +import apoc.kafka.consumer.procedures.StreamsSinkProcedures +import apoc.kafka.consumer.utils.ConsumerUtils +import apoc.kafka.extensions.isDefaultDb +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.getProducerProperties +import kotlinx.coroutines.sync.Mutex +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log + +class StreamsSinkConfigurationListener(private val db: GraphDatabaseAPI, + private val log: Log) { + +// private val mutex = Mutex() +// + var eventSink: KafkaEventSink? = null +// +// private val streamsTopicService = StreamsTopicService() +// +// private var lastConfig: KafkaSinkConfiguration? = null +// +// private val producerConfig = getProducerProperties() +// +// private fun KafkaSinkConfiguration.excludeSourceProps() = this.asProperties() +// ?.filterNot { producerConfig.contains(it.key) || it.key.toString().startsWith("apoc.kafka.source") } + + + fun shutdown() { +// val isShuttingDown = eventSink != null +// if (isShuttingDown) { +// log.info("[Sink] Shutting down the Streams Sink Module") +// } +// eventSink?.stop() +// eventSink = null + StreamsSinkProcedures.unregisterStreamsEventSink(db) +// if (isShuttingDown) { +// log.info("[Sink] Shutdown of the Streams Sink Module completed") +// } + } + + fun start(configMap: Map) { +// lastConfig = KafkaSinkConfiguration.create(StreamsConfig.getConfiguration(), db.databaseName(), db.isDefaultDb()) +// val streamsSinkConfiguration = lastConfig!!.sinkConfiguration +// streamsTopicService.clearAll() +// streamsTopicService.setAll(streamsSinkConfiguration.topics) +// +// val neo4jStrategyStorage = Neo4jStreamsStrategyStorage(streamsTopicService, configMap, db) +// val streamsQueryExecution = StreamsEventSinkQueryExecution(db, +// log, neo4jStrategyStorage) +// + eventSink = StreamsEventSinkFactory + .getStreamsEventSink(configMap, + // streamsQueryExecution, + // streamsTopicService, + log, + db) +// try { +// if (streamsSinkConfiguration.enabled) { +// log.info("[Sink] The Streams Sink module is starting") +// if (KafkaUtil.isCluster(db)) { +// initSinkModule(streamsSinkConfiguration) +// } else { +// runInASingleInstance(streamsSinkConfiguration) +// } +// } +// } catch (e: Exception) { +// log.warn("Cannot start the Streams Sink module because the following exception", e) +// } +// +// log.info("[Sink] Registering the Streams Sink procedures") + StreamsSinkProcedures.registerStreamsEventSink(db, eventSink!!) + } + +// private fun initSink() { +// eventSink?.start() +// eventSink?.printInvalidTopics() +// } +// +// private fun runInASingleInstance(streamsSinkConfiguration: StreamsSinkConfiguration) { +// // check if is writeable instance +// ConsumerUtils.executeInWriteableInstance(db) { +// if (streamsSinkConfiguration.clusterOnly) { +// log.info(""" +// |Cannot init the Streams Sink module as is forced to work only in a cluster env, +// |please check the value of `${StreamsConfig.CLUSTER_ONLY}` +// """.trimMargin()) +// } else { +// initSinkModule(streamsSinkConfiguration) +// } +// } +// } +// +// private fun initSinkModule(streamsSinkConfiguration: StreamsSinkConfiguration) { +// initSink() +// } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/StreamsTopicService.kt b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsTopicService.kt new file mode 100644 index 0000000000..f6fae70065 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/StreamsTopicService.kt @@ -0,0 +1,95 @@ +package apoc.kafka.consumer + +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import apoc.kafka.service.TopicType +import apoc.kafka.service.Topics +import java.util.Collections +import java.util.concurrent.ConcurrentHashMap + +class StreamsTopicService { + + private val storage = ConcurrentHashMap() + + private val mutex = Mutex() + + fun clearAll() { + storage.clear() + } + + private fun throwRuntimeException(data: Any, topicType: TopicType): Unit = + throw RuntimeException("Unsupported data $data for topic type $topicType") + + fun set(topicType: TopicType, data: Any) = runBlocking { + mutex.withLock { + var oldData = storage[topicType] + oldData = oldData ?: when (data) { + is Map<*, *> -> emptyMap() + is Collection<*> -> emptyList() + else -> throwRuntimeException(data, topicType) + } + val newData = when (oldData) { + is Map<*, *> -> oldData + (data as Map) + is Collection<*> -> oldData + (data as Collection) + else -> throwRuntimeException(data, topicType) + } + storage[topicType] = newData + } + } + + fun remove(topicType: TopicType, topic: String) = runBlocking { + mutex.withLock { + val topicData = storage[topicType] ?: return@runBlocking + + val runtimeException = RuntimeException("Unsupported data $topicData for topic type $topicType") + val filteredData = when (topicData) { + is Map<*, *> -> topicData.filterKeys { it.toString() != topic } + is Collection<*> -> topicData.filter { it.toString() != topic } + else -> throw runtimeException + } + + storage[topicType] = filteredData + } + } + + fun getTopicType(topic: String) = runBlocking { + TopicType.values() + .find { + mutex.withLock { + when (val topicData = storage[it]) { + is Map<*, *> -> topicData.containsKey(topic) + is Collection<*> -> topicData.contains(topic) + else -> false + } + } + } + } + + fun getTopics() = runBlocking { + TopicType.values() + .flatMap { + mutex.withLock { + when (val data = storage[it]) { + is Map<*, *> -> data.keys + is Collection<*> -> data.toSet() + else -> emptySet() + } + } + }.toSet() as Set + } + + fun setAll(topics: Topics) { + topics.asMap().forEach { (topicType, data) -> + set(topicType, data) + } + } + + fun getCypherTemplate(topic: String) = (storage.getOrDefault(TopicType.CYPHER, emptyMap()) as Map) + .let { it[topic] } + + fun getAll(): Map = Collections.unmodifiableMap(storage) + + fun getByTopicType(topicType: TopicType): Any? = storage[topicType] + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaAutoCommitEventConsumer.kt b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaAutoCommitEventConsumer.kt new file mode 100644 index 0000000000..244fc37f4a --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaAutoCommitEventConsumer.kt @@ -0,0 +1,145 @@ +package apoc.kafka.consumer.kafka + +import io.confluent.kafka.serializers.KafkaAvroDeserializer +import org.apache.avro.generic.GenericRecord +import org.apache.kafka.clients.consumer.ConsumerRecord +import org.apache.kafka.clients.consumer.KafkaConsumer +import org.apache.kafka.clients.consumer.OffsetAndMetadata +import org.apache.kafka.common.TopicPartition +import org.apache.kafka.common.serialization.ByteArrayDeserializer +import org.neo4j.logging.Log +import apoc.kafka.consumer.StreamsEventConsumer +import apoc.kafka.extensions.offsetAndMetadata +import apoc.kafka.extensions.toStreamsSinkEntity +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.service.errors.* +import java.time.Duration +import java.util.concurrent.atomic.AtomicBoolean + +data class KafkaTopicConfig(val commit: Boolean, val topicPartitionsMap: Map) { + companion object { + private fun toTopicPartitionMap(topicConfig: Map>>): Map = topicConfig + .flatMap { topicConfigEntry -> + topicConfigEntry.value.map { + val partition = it.getValue("partition").toString().toInt() + val offset = it.getValue("offset").toString().toLong() + TopicPartition(topicConfigEntry.key, partition) to offset + } + } + .toMap() + + fun fromMap(map: Map): KafkaTopicConfig { + val commit = map.getOrDefault("commit", true).toString().toBoolean() + val topicPartitionsMap = toTopicPartitionMap(map + .getOrDefault("partitions", emptyMap>>()) as Map>>) + return KafkaTopicConfig(commit = commit, topicPartitionsMap = topicPartitionsMap) + } + } +} + +abstract class KafkaEventConsumer(config: KafkaSinkConfiguration, + log: Log, + topics: Set): StreamsEventConsumer(log, topics) { + abstract fun wakeup() +} + +open class KafkaAutoCommitEventConsumer(private val config: KafkaSinkConfiguration, + private val log: Log, + val topics: Set, + private val dbName: String): KafkaEventConsumer(config, log, topics) { + + private val errorService: ErrorService = KafkaErrorService(config.asProperties(), + ErrorService.ErrorConfig.from(emptyMap()), + { s, e -> log.error(s,e as Throwable) }) + + // override fun invalidTopics(): List = config.sinkConfiguration.topics.invalid + + private val isSeekSet = AtomicBoolean() + + val consumer: KafkaConsumer<*, *> = when { + config.keyDeserializer == ByteArrayDeserializer::class.java.name && config.valueDeserializer == ByteArrayDeserializer::class.java.name -> KafkaConsumer(config.asProperties()) + config.keyDeserializer == ByteArrayDeserializer::class.java.name && config.valueDeserializer == KafkaAvroDeserializer::class.java.name -> KafkaConsumer(config.asProperties()) + config.keyDeserializer == KafkaAvroDeserializer::class.java.name && config.valueDeserializer == KafkaAvroDeserializer::class.java.name -> KafkaConsumer(config.asProperties()) + config.keyDeserializer == KafkaAvroDeserializer::class.java.name && config.valueDeserializer == ByteArrayDeserializer::class.java.name -> KafkaConsumer(config.asProperties()) + else -> throw RuntimeException("Invalid config") + } + + override fun start() { + if (topics.isEmpty()) { + log.info("No topics specified Kafka Consumer will not started") + return + } + this.consumer.subscribe(topics) + } + + override fun stop() { + consumer.close() + errorService.close() + } + + private fun readSimple(action: (String, List) -> Unit) { + val records = consumer.poll(Duration.ZERO) + if (records.isEmpty) return + this.topics.forEach { topic -> + val topicRecords = records.records(topic) + executeAction(action, topic, topicRecords) + } + } + + fun executeAction(action: (String, List) -> Unit, topic: String, topicRecords: Iterable>) { + try { + action(topic, topicRecords.map { it.toStreamsSinkEntity() }) + } catch (e: Exception) { + errorService.report(topicRecords.map { ErrorData.from(it, e, this::class.java, dbName) }) + } + } + + fun readFromPartition(kafkaTopicConfig: KafkaTopicConfig, + action: (String, List) -> Unit): Map { + setSeek(kafkaTopicConfig.topicPartitionsMap) + val records = consumer.poll(Duration.ZERO) + return when (records.isEmpty) { + true -> emptyMap() + else -> kafkaTopicConfig.topicPartitionsMap + .mapValues { records.records(it.key) } + .filterValues { it.isNotEmpty() } + .mapValues { (topic, topicRecords) -> + executeAction(action, topic.topic(), topicRecords) + topicRecords.last().offsetAndMetadata() + } + } + } + + override fun read(action: (String, List) -> Unit) { + readSimple(action) + } + + override fun read(topicConfig: Map, action: (String, List) -> Unit) { + val kafkaTopicConfig = KafkaTopicConfig.fromMap(topicConfig) + if (kafkaTopicConfig.topicPartitionsMap.isEmpty()) { + readSimple(action) + } else { + readFromPartition(kafkaTopicConfig, action) + } + } + + private fun setSeek(topicPartitionsMap: Map) { + if (!isSeekSet.compareAndSet(false, true)) { + return + } + consumer.poll(0) // dummy call see: https://stackoverflow.com/questions/41008610/kafkaconsumer-0-10-java-api-error-message-no-current-assignment-for-partition + topicPartitionsMap.forEach { + when (it.value) { + -1L -> consumer.seekToBeginning(listOf(it.key)) + -2L -> consumer.seekToEnd(listOf(it.key)) + else -> consumer.seek(it.key, it.value) + } + } + } + + override fun wakeup() { + consumer.wakeup() + } +} + diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaEventSink.kt b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaEventSink.kt new file mode 100644 index 0000000000..a4c7ced6d5 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaEventSink.kt @@ -0,0 +1,194 @@ +package apoc.kafka.consumer.kafka + +import apoc.kafka.config.StreamsConfig +import apoc.kafka.consumer.StreamsEventConsumer +import apoc.kafka.consumer.StreamsEventConsumerFactory +import apoc.kafka.consumer.StreamsEventSinkQueryExecution +//import apoc.kafka.consumer.StreamsSinkConfiguration +import apoc.kafka.consumer.StreamsTopicService +import apoc.kafka.consumer.utils.ConsumerUtils +import apoc.kafka.events.StreamsPluginStatus +import apoc.kafka.extensions.isDefaultDb +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.getInvalidTopicsError +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancelAndJoin +import kotlinx.coroutines.delay +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import org.apache.kafka.common.errors.WakeupException +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log + +class KafkaEventSink(//private val config: Map, + //private val queryExecution: StreamsEventSinkQueryExecution, + // private val streamsTopicService: StreamsTopicService, + // private val log: Log, + private val db: GraphDatabaseAPI) { + + private val mutex = Mutex() + + private lateinit var eventConsumer: KafkaEventConsumer + private var job: Job? = null + +// val streamsSinkConfiguration: StreamsSinkConfiguration = StreamsSinkConfiguration.from(configMap = config, +// dbName = db.databaseName(), isDefaultDb = db.isDefaultDb()) +// +// private val streamsConfig: StreamsSinkConfiguration = StreamsSinkConfiguration.from(configMap = config, +// dbName = db.databaseName(), isDefaultDb = db.isDefaultDb()) + + fun getEventConsumerFactory(): StreamsEventConsumerFactory { + return object: StreamsEventConsumerFactory() { + override fun createStreamsEventConsumer(config: Map, log: Log, topics: Set): StreamsEventConsumer { + val dbName = db.databaseName() + val kafkaConfig = KafkaSinkConfiguration.from(config, dbName, db.isDefaultDb()) + val topics1 = topics as Set + return if (kafkaConfig.enableAutoCommit) { + KafkaAutoCommitEventConsumer(kafkaConfig, log, topics1, dbName) + } else { + KafkaManualCommitEventConsumer(kafkaConfig, log, topics1, dbName) + } + } + } + } + +// fun start() = runBlocking { // TODO move to the abstract class +// if (streamsConfig.clusterOnly && !KafkaUtil.isCluster(db)) { +// if (log.isDebugEnabled) { +// log.info(""" +// |Cannot init the Kafka Sink module as is forced to work only in a cluster env, +// |please check the value of `${StreamsConfig.CLUSTER_ONLY}` +// """.trimMargin()) +// } +// return@runBlocking +// } +// val topics = streamsTopicService.getTopics() +// val isWriteableInstance = ConsumerUtils.isWriteableInstance(db) +// if (!streamsConfig.enabled) { +// if (topics.isNotEmpty() && isWriteableInstance) { +// log.warn("You configured the following topics: $topics, in order to make the Sink work please set ${StreamsConfig.SINK_ENABLED}=true") +// } +// log.info("The Kafka Sink is disabled") +// return@runBlocking +// } +// if (topics.isEmpty()) { +// if (isWriteableInstance) { +// log.warn("The Kafka Sink will not start because no topics are provided") +// } +// return@runBlocking +// } +// log.info("Starting the Kafka Sink") +// mutex.withLock(job) { +// if (StreamsPluginStatus.RUNNING == status(job)) { +// if (log.isDebugEnabled) { +// log.debug("Kafka Sink is already started.") +// } +// return@runBlocking +// } +// try { +// job = createJob(streamsConfig, topics) +// } catch (e: Exception) { +// log.error("The Kafka Sink will not start, cannot create the sink job because of the following exception:", e) +// return@runBlocking +// } +// } +// if (isWriteableInstance) { +// if (log.isDebugEnabled) { +// streamsTopicService.getAll().forEach { +// log.debug("Subscribed topics for type ${it.key} are: ${it.value}") +// } +// } else { +// log.info("Subscribed topics: $topics") +// } +// } else { +// if (log.isDebugEnabled) { +// log.info("Not a writeable instance") +// } +// } +// log.info("Kafka Sink started") +// } +// +// fun stop() = runBlocking { // TODO move to the abstract class +// log.info("Stopping Kafka Sink daemon Job") +// mutex.withLock(job) { +// if (status(job) == StreamsPluginStatus.STOPPED) { +// return@runBlocking +// } +// KafkaUtil.ignoreExceptions({ +// runBlocking { +// if (job?.isActive == true) { +// eventConsumer.wakeup() +// job?.cancelAndJoin() +// } +// log.info("Kafka Sink daemon Job stopped") +// } +// }, UninitializedPropertyAccessException::class.java) +// } +// Unit +// } +// +// private fun createJob(streamsConfig: StreamsSinkConfiguration, topics: Set): Job { +// log.info("Creating Sink daemon Job") +// return GlobalScope.launch(Dispatchers.IO) { // TODO improve exception management +// try { +// eventConsumer = getEventConsumerFactory() +// .createStreamsEventConsumer(config, log, topics) as KafkaEventConsumer +// eventConsumer.start() +// while (isActive) { +// val timeMillis = if (ConsumerUtils.isWriteableInstance(db)) { +// eventConsumer.read { topic, data -> +// if (log.isDebugEnabled) { +// log.debug("Reading data from topic $topic") +// } +// queryExecution.writeForTopic(topic, data) +// } +// streamsConfig.pollInterval +// } else { +// val timeMillis = streamsConfig.checkWriteableInstanceInterval +// if (log.isDebugEnabled) { +// log.debug("Not in a writeable instance, new check in $timeMillis millis") +// } +// timeMillis +// } +// delay(timeMillis) +// } +// } catch (e: Exception) { +// when (e) { +// is CancellationException, is WakeupException -> null +// else -> { +// val message = e.message ?: "Generic error, please check the stack trace: " +// log.error(message, e) +// } +// } +// } finally { +// KafkaUtil.ignoreExceptions({ eventConsumer.stop() }, Exception::class.java) +// } +// } +// } +// +// fun printInvalidTopics() { +// KafkaUtil.ignoreExceptions({ +// if (eventConsumer.invalidTopics().isNotEmpty()) { +// log.warn(getInvalidTopicsError(eventConsumer.invalidTopics())) +// } +// }, UninitializedPropertyAccessException::class.java) +// } + + fun status(): StreamsPluginStatus = runBlocking { + mutex.withLock(job) { + status(job) + } + } + + private fun status(job: Job?): StreamsPluginStatus = when (job?.isActive) { + true -> StreamsPluginStatus.RUNNING + else -> StreamsPluginStatus.STOPPED + } + +} diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaManualCommitEventConsumer.kt b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaManualCommitEventConsumer.kt new file mode 100644 index 0000000000..6871f036f8 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaManualCommitEventConsumer.kt @@ -0,0 +1,118 @@ +package apoc.kafka.consumer.kafka + +import org.apache.kafka.clients.consumer.CommitFailedException +import org.apache.kafka.clients.consumer.ConsumerRebalanceListener +import org.apache.kafka.clients.consumer.OffsetAndMetadata +import org.apache.kafka.common.TopicPartition +import org.apache.kafka.common.errors.WakeupException +import org.neo4j.logging.Log +import apoc.kafka.extensions.offsetAndMetadata +import apoc.kafka.extensions.topicPartition +import apoc.kafka.service.StreamsSinkEntity +import java.time.Duration + +class KafkaManualCommitEventConsumer(config: KafkaSinkConfiguration, + private val log: Log, + topics: Set, + dbName: String): KafkaAutoCommitEventConsumer(config, log, topics, dbName) { + + private val asyncCommit = config.asyncCommit + + override fun stop() { + if (asyncCommit) { + doCommitSync() + } + super.stop() + } + + private fun doCommitSync() { + try { + /* + * While everything is fine, we use commitAsync. + * It is faster, and if one commit fails, the next commit will serve as a retry. + * But if we are closing, there is no "next commit". We call commitSync(), + * because it will retry until it succeeds or suffers unrecoverable failure. + */ + consumer.commitSync() + } catch (e: WakeupException) { + // we're shutting down, but finish the commit first and then + // rethrow the exception so that the main loop can exit + doCommitSync() + throw e + } catch (e: CommitFailedException) { + // the commit failed with an unrecoverable error. if there is any + // internal state which depended on the commit, you can clean it + // up here. otherwise it's reasonable to ignore the error and go on + log.warn("Commit failed", e) + } + } + + override fun start() { + if (asyncCommit) { + if (topics.isEmpty()) { + log.info("No topics specified Kafka Consumer will not started") + return + } + this.consumer.subscribe(topics, object : ConsumerRebalanceListener { + override fun onPartitionsRevoked(partitions: Collection) = doCommitSync() + + override fun onPartitionsAssigned(partitions: Collection) {} + }) + } else { + super.start() + } + } + + private fun commitData(commit: Boolean, topicMap: Map) { + if (commit && topicMap.isNotEmpty()) { + if (asyncCommit) { + if (log.isDebugEnabled) { + log.debug("Committing data in async") + } + consumer.commitAsync(topicMap) { offsets: MutableMap, exception: Exception? -> + if (exception != null) { + log.warn(""" + |These offsets `$offsets` + |cannot be committed because of the following exception: + """.trimMargin(), exception) + } + } + } else { + if (log.isDebugEnabled) { + log.debug("Committing data in sync") + } + consumer.commitSync(topicMap) + } + } + } + + override fun read(action: (String, List) -> Unit) { + val topicMap = readSimple(action) + commitData(true, topicMap) + } + + override fun read(topicConfig: Map, action: (String, List) -> Unit) { + val kafkaTopicConfig = KafkaTopicConfig.fromMap(topicConfig) + val topicMap = if (kafkaTopicConfig.topicPartitionsMap.isEmpty()) { + readSimple(action) + } else { + readFromPartition(kafkaTopicConfig, action) + } + commitData(kafkaTopicConfig.commit, topicMap) + } + + private fun readSimple(action: (String, List) -> Unit): Map { + val records = consumer.poll(Duration.ZERO) + return when (records.isEmpty) { + true -> emptyMap() + else -> records.partitions() + .map { topicPartition -> + val topicRecords = records.records(topicPartition) + executeAction(action, topicPartition.topic(), topicRecords) + val last = topicRecords.last() + last.topicPartition() to last.offsetAndMetadata() + } + .toMap() + } + } +} diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaSinkConfiguration.kt b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaSinkConfiguration.kt new file mode 100644 index 0000000000..858e039abc --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/kafka/KafkaSinkConfiguration.kt @@ -0,0 +1,100 @@ +package apoc.kafka.consumer.kafka + +import io.confluent.kafka.serializers.KafkaAvroDeserializer +import org.apache.kafka.clients.CommonClientConfigs +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.common.serialization.ByteArrayDeserializer +//import apoc.kafka.consumer.StreamsSinkConfiguration +import apoc.kafka.extensions.toPointCase +import apoc.kafka.utils.JSONUtils +import apoc.kafka.utils.KafkaUtil.getInvalidTopics +import apoc.kafka.utils.KafkaUtil.validateConnection +import java.util.Properties + + +private const val kafkaConfigPrefix = "apoc.kafka." + +private val SUPPORTED_DESERIALIZER = listOf(ByteArrayDeserializer::class.java.name, KafkaAvroDeserializer::class.java.name) + +private fun validateDeserializers(config: KafkaSinkConfiguration) { +// val key = if (!SUPPORTED_DESERIALIZER.contains(config.keyDeserializer)) { +// ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG +// } else if (!SUPPORTED_DESERIALIZER.contains(config.valueDeserializer)) { +// ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG +// } else { +// "" +// } +// if (key.isNotBlank()) { +// throw RuntimeException("The property `kafka.$key` contains an invalid deserializer. Supported deserializers are $SUPPORTED_DESERIALIZER") +// } +} + +data class KafkaSinkConfiguration(val bootstrapServers: String = "localhost:9092", + val keyDeserializer: String = "org.apache.kafka.common.serialization.ByteArrayDeserializer", + val valueDeserializer: String = "org.apache.kafka.common.serialization.ByteArrayDeserializer", + val groupId: String = "neo4j", + val autoOffsetReset: String = "earliest", +// val sinkConfiguration: StreamsSinkConfiguration = StreamsSinkConfiguration(), + val enableAutoCommit: Boolean = true, + val asyncCommit: Boolean = false, + val extraProperties: Map = emptyMap()) { + + companion object { + + fun from(cfg: Map, dbName: String, isDefaultDb: Boolean): KafkaSinkConfiguration { + val kafkaCfg = create(cfg, dbName, isDefaultDb) + validate(kafkaCfg) +// val invalidTopics = getInvalidTopics(kafkaCfg.asProperties(), kafkaCfg.sinkConfiguration.topics.allTopics()) +// return if (invalidTopics.isNotEmpty()) { +// kafkaCfg.copy(sinkConfiguration = StreamsSinkConfiguration.from(cfg, dbName, invalidTopics, isDefaultDb)) +// } else { + return kafkaCfg +// } + } + + // Visible for testing + fun create(cfg: Map, dbName: String, isDefaultDb: Boolean): KafkaSinkConfiguration { + val config = cfg + .filterKeys { it.startsWith(kafkaConfigPrefix) && !it.startsWith("${kafkaConfigPrefix}sink") } + .mapKeys { it.key.substring(kafkaConfigPrefix.length) } + val default = KafkaSinkConfiguration() + + val keys = JSONUtils.asMap(default).keys.map { it.toPointCase() } + val extraProperties = config.filterKeys { !keys.contains(it) } + +// val streamsSinkConfiguration = StreamsSinkConfiguration.from(configMap = cfg, dbName = dbName, isDefaultDb = isDefaultDb) + + + return default.copy(keyDeserializer = config.getOrDefault(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, default.keyDeserializer), + valueDeserializer = config.getOrDefault(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, default.valueDeserializer), + bootstrapServers = config.getOrDefault(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, default.bootstrapServers), + autoOffsetReset = config.getOrDefault(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, default.autoOffsetReset), + groupId = config.getOrDefault(ConsumerConfig.GROUP_ID_CONFIG, default.groupId) + (if (isDefaultDb) "" else "-$dbName"), + enableAutoCommit = config.getOrDefault(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, default.enableAutoCommit).toString().toBoolean(), + asyncCommit = config.getOrDefault("async.commit", default.asyncCommit).toString().toBoolean(), +// sinkConfiguration = streamsSinkConfiguration, + extraProperties = extraProperties // for what we don't provide a default configuration + ) + } + + private fun validate(config: KafkaSinkConfiguration) { + validateConnection(config.bootstrapServers, CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, false) + val schemaRegistryUrlKey = "schema.registry.url" + if (config.extraProperties.containsKey(schemaRegistryUrlKey)) { + val schemaRegistryUrl = config.extraProperties.getOrDefault(schemaRegistryUrlKey, "") + validateConnection(schemaRegistryUrl, schemaRegistryUrlKey, false) + } + validateDeserializers(config) + } + } + + fun asProperties(): Properties { + val props = Properties() + val map = JSONUtils.asMap(this) + .filterKeys { it != "extraProperties" && it != "sinkConfiguration" } + .mapKeys { it.key.toPointCase() } + props.putAll(map) + props.putAll(extraProperties) + return props + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/procedures/QueueBasedSpliterator.kt b/extended/src/main/kotlin/apoc/kafka/consumer/procedures/QueueBasedSpliterator.kt new file mode 100644 index 0000000000..315ae49201 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/procedures/QueueBasedSpliterator.kt @@ -0,0 +1,67 @@ +package apoc.kafka.consumer.procedures + +import org.neo4j.graphdb.NotInTransactionException +import org.neo4j.graphdb.TransactionTerminatedException +import org.neo4j.procedure.TerminationGuard +import java.util.Spliterator +import java.util.concurrent.BlockingQueue +import java.util.concurrent.TimeUnit +import java.util.function.Consumer + +/** + * @author mh + * @since 08.05.16 in APOC + */ +class QueueBasedSpliterator constructor(private val queue: BlockingQueue, + private val tombstone: T, + private val terminationGuard: TerminationGuard, + private val timeout: Long = 10) : Spliterator { + private var entry: T? + + init { + entry = poll() + } + + override fun tryAdvance(action: Consumer): Boolean { + if (transactionIsTerminated(terminationGuard)) return false + if (isEnd) return false + action.accept(entry) + entry = poll() + return !isEnd + } + + private fun transactionIsTerminated(terminationGuard: TerminationGuard): Boolean { + return try { + terminationGuard.check() + false + } catch (e: Exception) { + when (e) { + is TransactionTerminatedException, is NotInTransactionException -> true + else -> throw e + } + } + } + + private val isEnd: Boolean + private get() = entry == null || entry === tombstone + + private fun poll(): T? { + return try { + queue.poll(timeout, TimeUnit.SECONDS) + } catch (e: InterruptedException) { + null + } + } + + override fun trySplit(): Spliterator? { + return null + } + + override fun estimateSize(): Long { + return Long.MAX_VALUE + } + + override fun characteristics(): Int { + return Spliterator.NONNULL + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/procedures/StreamsSinkProcedures.kt b/extended/src/main/kotlin/apoc/kafka/consumer/procedures/StreamsSinkProcedures.kt new file mode 100644 index 0000000000..aad44fafef --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/procedures/StreamsSinkProcedures.kt @@ -0,0 +1,213 @@ +package apoc.kafka.consumer.procedures + +import apoc.kafka.config.StreamsConfig +import apoc.kafka.consumer.StreamsEventConsumer +//import apoc.kafka.consumer.StreamsSinkConfiguration +import apoc.kafka.consumer.kafka.KafkaEventSink +import apoc.kafka.events.StreamsPluginStatus +import apoc.kafka.extensions.isDefaultDb +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.checkEnabled +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.apache.commons.lang3.exception.ExceptionUtils +import org.neo4j.graphdb.GraphDatabaseService +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log +import org.neo4j.procedure.Context +import org.neo4j.procedure.Description +import org.neo4j.procedure.Mode +import org.neo4j.procedure.Name +import org.neo4j.procedure.Procedure +import org.neo4j.procedure.TerminationGuard +import java.util.concurrent.ArrayBlockingQueue +import java.util.concurrent.ConcurrentHashMap +import java.util.stream.Collectors +import java.util.stream.Stream +import java.util.stream.StreamSupport + +class StreamResult(@JvmField val event: Map) +class KeyValueResult(@JvmField val name: String, @JvmField val value: Any?) + +class StreamsSinkProcedures { + + + @JvmField @Context + var log: Log? = null + + @JvmField @Context + var db: GraphDatabaseAPI? = null + + @JvmField @Context + var terminationGuard: TerminationGuard? = null + + @Procedure(mode = Mode.READ, name = "apoc.kafka.consume") + @Description("apoc.kafka.consume(topic, {timeout: , from: , groupId: , commit: , partitions:[{partition: , offset: }]}) " + + "YIELD event - Allows to consume custom topics") + fun consume(@Name("topic") topic: String?, + @Name(value = "config", defaultValue = "{}") config: Map?): Stream = runBlocking { + checkEnabled() + if (topic.isNullOrEmpty()) { + log?.info("Topic empty, no message sent") + Stream.empty() + } else { + val properties = config?.mapValues { it.value.toString() } ?: emptyMap() +// val configuration = getStreamsEventSink(db!!)!! +// .getEventSinkConfigMapper() +// .convert(config = properties) + + val configuration = StreamsConfig.getConfiguration(properties) + readData(topic, config ?: emptyMap(), configuration) + } + } + +// @Procedure("apoc.kafka.sink.start") +// fun sinkStart(): Stream { +// checkEnabled() +// return checkLeader { +// try { +// getStreamsEventSink(db!!)?.start() +// sinkStatus() +// } catch (e: Exception) { +// log?.error("Cannot start the Sink because of the following exception", e) +// Stream.concat(sinkStatus(), +// Stream.of(KeyValueResult("exception", ExceptionUtils.getStackTrace(e)))) +// } +// } +// } +// +// @Procedure("apoc.kafka.sink.stop") +// fun sinkStop(): Stream { +// checkEnabled() +// return checkLeader { +// try { +// getStreamsEventSink(db!!)?.stop() +// sinkStatus() +// } catch (e: Exception) { +// log?.error("Cannot stopped the Sink because of the following exception", e) +// Stream.concat(sinkStatus(), +// Stream.of(KeyValueResult("exception", ExceptionUtils.getStackTrace(e)))) +// } +// } +// } +// +// @Procedure("apoc.kafka.sink.restart") +// fun sinkRestart(): Stream { +// val stopped = sinkStop().collect(Collectors.toList()) +// val hasError = stopped.any { it.name == "exception" } +// if (hasError) { +// return stopped.stream() +// } +// return sinkStart() +// } +// +// @Procedure("apoc.kafka.sink.config") +// @Deprecated("Please use apoc.kafka.configuration.get") +// fun sinkConfig(): Stream { +// checkEnabled() +// return checkLeader { +// StreamsSinkConfiguration +// // todo - check that +//// .from(configMap = StreamsConfig.getInstance(db!! as GraphDatabaseAPI) +// .from(configMap = StreamsConfig +// .getConfiguration().mapValues { it.value.toString() }, +// dbName = db!!.databaseName(), +// isDefaultDb = db!!.isDefaultDb()) +// .asMap() +// .entries.stream() +// .map { KeyValueResult(it.key, it.value) } +// } +// } +// +// @Procedure("apoc.kafka.sink.status") +// fun sinkStatus(): Stream { +// checkEnabled() +// return run { +// val value = (getStreamsEventSink(db!!)?.status() ?: StreamsPluginStatus.UNKNOWN).toString() +// Stream.of(KeyValueResult("status", value)) +// } +// } + + private fun checkLeader(lambda: () -> Stream): Stream = if (KafkaUtil.isWriteableInstance(db as GraphDatabaseAPI)) { + lambda() + } else { + Stream.of(KeyValueResult("error", "You can use this procedure only in the LEADER or in a single instance configuration.")) + } + + private fun readData(topic: String, procedureConfig: Map, consumerConfig: Map): Stream { + val cfg = procedureConfig.mapValues { if (it.key != "partitions") it.value else mapOf(topic to it.value) } + val timeout = cfg.getOrDefault("timeout", 1000).toString().toLong() + val data = ArrayBlockingQueue(1000) + val tombstone = StreamResult(emptyMap()) + GlobalScope.launch(Dispatchers.IO) { + val consumer = createConsumer(consumerConfig, topic) + consumer.start() + try { + val start = System.currentTimeMillis() + while ((System.currentTimeMillis() - start) < timeout) { + println("coroutineContext = ${coroutineContext}") + consumer.read(cfg) { _, topicData -> + println("topicData = ${topicData}") + data.addAll(topicData.mapNotNull { it.value }.map { StreamResult(mapOf("data" to it)) }) + } + } + println("coroutineContext = ${coroutineContext}") + data.add(tombstone) + } catch (e: Exception) { + println("coroutineContext = " + + e.message) + if (log?.isDebugEnabled!!) { + log?.error("Error while consuming data", e) + } + } finally { + consumer.stop() + } + } + if (log?.isDebugEnabled!!) { + log?.debug("Data retrieved from topic $topic after $timeout milliseconds: $data") + } + + return StreamSupport.stream(QueueBasedSpliterator(data, tombstone, terminationGuard!!, timeout), false) + } + + private fun createConsumer(consumerConfig: Map, topic: String): StreamsEventConsumer = runBlocking { + // todo - check that + val copy = StreamsConfig.getConfiguration() +// val copy = StreamsConfig.getInstance(db!! as GraphDatabaseAPI).getConfiguration() + .filter { it.value is String } + .mapValues { it.value.toString() } + .toMutableMap() + copy.putAll(consumerConfig) + getStreamsEventSink(db!!)!!.getEventConsumerFactory() + .createStreamsEventConsumer(copy, log!!, setOf(topic)) + } + + companion object { + // todo - move in another class, similar to CypherProceduresHandler extends LifecycleAdapter implements AvailabilityListener { +// fun initListeners(db: GraphDatabaseAPI?, log: Log?) { +// // todo - move in another class, similar to CypherProcedureHandler +// // todo - check if there is a better way, maybe put if(apoc.kafka.enabled=true) +// StreamsRouterConfigurationListener(db!!, log!! +// ).start(StreamsConfig.getConfiguration()) +// +// StreamsSinkConfigurationListener(db!!, log!! +// ).start(StreamsConfig.getConfiguration()) +// } +// + private val streamsEventSinkStore = ConcurrentHashMap() + + private fun getStreamsEventSink(db: GraphDatabaseService) = streamsEventSinkStore[KafkaUtil.getName(db)] + + fun registerStreamsEventSink(db: GraphDatabaseAPI, streamsEventSink: KafkaEventSink) { + streamsEventSinkStore[KafkaUtil.getName(db)] = streamsEventSink + } + + fun unregisterStreamsEventSink(db: GraphDatabaseAPI) = streamsEventSinkStore.remove(KafkaUtil.getName(db)) + + fun hasStatus(db: GraphDatabaseAPI, status: StreamsPluginStatus) = getStreamsEventSink(db)?.status() == status + + fun isRegistered(db: GraphDatabaseAPI) = getStreamsEventSink(db) != null + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/consumer/utils/ConsumerUtils.kt b/extended/src/main/kotlin/apoc/kafka/consumer/utils/ConsumerUtils.kt new file mode 100644 index 0000000000..d67bddcfb4 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/consumer/utils/ConsumerUtils.kt @@ -0,0 +1,13 @@ +package apoc.kafka.consumer.utils + +import org.neo4j.kernel.internal.GraphDatabaseAPI +import apoc.kafka.utils.KafkaUtil + +object ConsumerUtils { + + fun isWriteableInstance(db: GraphDatabaseAPI): Boolean = KafkaUtil.isWriteableInstance(db) + + fun executeInWriteableInstance(db: GraphDatabaseAPI, + action: () -> T?): T? = KafkaUtil.executeInWriteableInstance(db, action) + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/events/ProcedureResults.kt b/extended/src/main/kotlin/apoc/kafka/events/ProcedureResults.kt new file mode 100644 index 0000000000..11e0c76b89 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/events/ProcedureResults.kt @@ -0,0 +1,4 @@ +package apoc.kafka.events + +class StreamResult(@JvmField val event: Map) +class KeyValueResult(@JvmField val name: String, @JvmField val value: Any?) \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/events/StreamsEvent.kt b/extended/src/main/kotlin/apoc/kafka/events/StreamsEvent.kt new file mode 100644 index 0000000000..b7573a9eb6 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/events/StreamsEvent.kt @@ -0,0 +1,71 @@ +package apoc.kafka.events + +import org.neo4j.graphdb.schema.ConstraintType + +enum class OperationType { created, updated, deleted } + +data class Meta(val timestamp: Long, + val username: String, + val txId: Long, + val txEventId: Int, + val txEventsCount: Int, + val operation: OperationType, + val source: Map = emptyMap()) + + +enum class EntityType { node, relationship } + +data class RelationshipNodeChange(val id: String, + val labels: List?, + val ids: Map) + +abstract class RecordChange{ abstract val properties: Map? } +data class NodeChange(override val properties: Map?, + val labels: List?): RecordChange() + +data class RelationshipChange(override val properties: Map?): RecordChange() + +abstract class Payload { + abstract val id: String + abstract val type: EntityType + abstract val before: RecordChange? + abstract val after: RecordChange? +} +data class NodePayload(override val id: String, + override val before: NodeChange?, + override val after: NodeChange?, + override val type: EntityType = EntityType.node): Payload() + +data class RelationshipPayload(override val id: String, + val start: RelationshipNodeChange, + val end: RelationshipNodeChange, + override val before: RelationshipChange?, + override val after: RelationshipChange?, + val label: String, + override val type: EntityType = EntityType.relationship): Payload() + +enum class StreamsConstraintType { UNIQUE, NODE_PROPERTY_EXISTS, RELATIONSHIP_PROPERTY_EXISTS } + +enum class RelKeyStrategy { DEFAULT, ALL } + +data class Constraint(val label: String?, + val properties: Set, + val type: StreamsConstraintType) + +data class Schema(val properties: Map = emptyMap(), + val constraints: List = emptyList()) + +open class StreamsEvent(open val payload: Any) +data class StreamsTransactionEvent(val meta: Meta, override val payload: Payload, val schema: Schema): StreamsEvent(payload) + +data class StreamsTransactionNodeEvent(val meta: Meta, + val payload: NodePayload, + val schema: Schema) { + fun toStreamsTransactionEvent() = StreamsTransactionEvent(this.meta, this.payload, this.schema) +} +data class StreamsTransactionRelationshipEvent(val meta: Meta, + val payload: RelationshipPayload, + val schema: Schema) { + fun toStreamsTransactionEvent() = StreamsTransactionEvent(this.meta, this.payload, this.schema) +} + diff --git a/extended/src/main/kotlin/apoc/kafka/events/StreamsPluginStatus.kt b/extended/src/main/kotlin/apoc/kafka/events/StreamsPluginStatus.kt new file mode 100644 index 0000000000..4eb3997889 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/events/StreamsPluginStatus.kt @@ -0,0 +1,3 @@ +package apoc.kafka.events + +enum class StreamsPluginStatus { RUNNING, STOPPED, UNKNOWN } \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/extensions/CommonExtensions.kt b/extended/src/main/kotlin/apoc/kafka/extensions/CommonExtensions.kt new file mode 100644 index 0000000000..4c975816da --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/extensions/CommonExtensions.kt @@ -0,0 +1,81 @@ +package apoc.kafka.extensions + +import org.apache.avro.Schema +import org.apache.avro.generic.GenericEnumSymbol +import org.apache.avro.generic.GenericFixed +import org.apache.avro.generic.GenericRecord +import org.apache.avro.generic.IndexedRecord +import org.apache.kafka.clients.consumer.ConsumerRecord +import org.apache.kafka.clients.consumer.OffsetAndMetadata +import org.apache.kafka.common.TopicPartition +import org.neo4j.graphdb.Node +import apoc.kafka.utils.JSONUtils +import apoc.kafka.service.StreamsSinkEntity +import java.nio.ByteBuffer +import java.util.* +import javax.lang.model.SourceVersion + +fun Map.getInt(name:String, defaultValue: Int) = this.get(name)?.toInt() ?: defaultValue +fun Map<*, *>.asProperties() = this.let { + val properties = Properties() + properties.putAll(it) + properties +} + +fun Node.labelNames() : List { + return this.labels.map { it.name() } +} + +fun String.toPointCase(): String { + return this.split("(?<=[a-z])(?=[A-Z])".toRegex()).joinToString(separator = ".").toLowerCase() +} + +fun String.quote(): String = if (SourceVersion.isIdentifier(this)) this else "`$this`" + +fun Map.flatten(map: Map = this, prefix: String = ""): Map { + return map.flatMap { + val key = it.key + val value = it.value + val newKey = if (prefix != "") "$prefix.$key" else key + if (value is Map<*, *>) { + flatten(value as Map, newKey).toList() + } else { + listOf(newKey to value) + } + }.toMap() +} + +fun ConsumerRecord<*, *>.topicPartition() = TopicPartition(this.topic(), this.partition()) +fun ConsumerRecord<*, *>.offsetAndMetadata(metadata: String = "") = OffsetAndMetadata(this.offset() + 1, metadata) + +private fun convertAvroData(rawValue: Any?): Any? = when (rawValue) { + is IndexedRecord -> rawValue.toMap() + is Collection<*> -> rawValue.map(::convertAvroData) + is Array<*> -> if (rawValue.javaClass.componentType.isPrimitive) rawValue else rawValue.map(::convertAvroData) + is Map<*, *> -> rawValue + .mapKeys { it.key.toString() } + .mapValues { convertAvroData(it.value) } + is GenericFixed -> rawValue.bytes() + is ByteBuffer -> rawValue.array() + is GenericEnumSymbol<*>, is CharSequence -> rawValue.toString() + else -> rawValue +} +fun IndexedRecord.toMap() = this.schema.fields + .map { it.name() to convertAvroData(this[it.pos()]) } + .toMap() + +fun Schema.toMap() = JSONUtils.asMap(this.toString()) + +private fun convertData(data: Any?, stringWhenFailure: Boolean = false): Any? { + return when (data) { + null -> null + is ByteArray -> JSONUtils.readValue(data, Any::class.java) + is GenericRecord -> data.toMap() + else -> if (stringWhenFailure) data.toString() else throw RuntimeException("Unsupported type ${data::class.java.name}") + } +} +fun ConsumerRecord<*, *>.toStreamsSinkEntity(): StreamsSinkEntity { + val key = convertData(this.key(), true) + val value = convertData(this.value()) + return StreamsSinkEntity(key, value) +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/extensions/CoroutineExtensions.kt b/extended/src/main/kotlin/apoc/kafka/extensions/CoroutineExtensions.kt new file mode 100644 index 0000000000..e4455533f1 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/extensions/CoroutineExtensions.kt @@ -0,0 +1,44 @@ +package apoc.kafka.extensions + +import kotlinx.coroutines.Deferred +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.ObsoleteCoroutinesApi +import kotlinx.coroutines.channels.ticker +import kotlinx.coroutines.selects.whileSelect +import java.util.concurrent.CopyOnWriteArraySet +import java.util.concurrent.TimeoutException + + +// taken from https://stackoverflow.com/questions/52192752/kotlin-how-to-run-n-coroutines-and-wait-for-first-m-results-or-timeout +@ObsoleteCoroutinesApi +@ExperimentalCoroutinesApi +suspend fun List>.awaitAll(timeoutMs: Long): List { + val jobs = CopyOnWriteArraySet>(this) + val result = ArrayList(size) + val timeout = ticker(timeoutMs) + + whileSelect { + jobs.forEach { deferred -> + deferred.onAwait { + jobs.remove(deferred) + result.add(it) + result.size != size + } + } + + timeout.onReceive { + jobs.forEach { it.cancel() } + throw TimeoutException("Tasks $size cancelled after timeout of $timeoutMs ms.") + } + } + + return result +} + +@ExperimentalCoroutinesApi +fun Deferred.errors() = when { + isCompleted -> getCompletionExceptionOrNull() + isCancelled -> getCompletionExceptionOrNull() // was getCancellationException() + isActive -> RuntimeException("Job $this still active") + else -> null +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/extensions/DatabaseManagementServiceExtensions.kt b/extended/src/main/kotlin/apoc/kafka/extensions/DatabaseManagementServiceExtensions.kt new file mode 100644 index 0000000000..08d7ed2688 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/extensions/DatabaseManagementServiceExtensions.kt @@ -0,0 +1,28 @@ +package apoc.kafka.extensions + +import apoc.kafka.utils.KafkaUtil +import org.neo4j.dbms.api.DatabaseManagementService +import org.neo4j.kernel.internal.GraphDatabaseAPI + +fun DatabaseManagementService.getSystemDb() = this.database(KafkaUtil.SYSTEM_DATABASE_NAME) as GraphDatabaseAPI + +fun DatabaseManagementService.getDefaultDbName() = getSystemDb().let { + try { + it.beginTx().use { + val col = it.execute("SHOW DEFAULT DATABASE").columnAs("name") + if (col.hasNext()) { + col.next() + } else { + null + } + } + } catch (e: Exception) { + null + } +} + +fun DatabaseManagementService.getDefaultDb() = getDefaultDbName()?.let { this.database(it) as GraphDatabaseAPI } + +fun DatabaseManagementService.isAvailable(timeout: Long) = this.listDatabases() + .all { this.database(it).isAvailable(timeout) } + diff --git a/extended/src/main/kotlin/apoc/kafka/extensions/GraphDatabaseServerExtensions.kt b/extended/src/main/kotlin/apoc/kafka/extensions/GraphDatabaseServerExtensions.kt new file mode 100644 index 0000000000..62aec6c725 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/extensions/GraphDatabaseServerExtensions.kt @@ -0,0 +1,32 @@ +package apoc.kafka.extensions + +import apoc.kafka.utils.KafkaUtil +import org.neo4j.common.DependencyResolver +import org.neo4j.dbms.api.DatabaseManagementService +import org.neo4j.graphdb.GraphDatabaseService +import org.neo4j.graphdb.Result +import org.neo4j.graphdb.event.TransactionEventListener +import org.neo4j.kernel.internal.GraphDatabaseAPI + +fun GraphDatabaseService.execute(cypher: String) = this.execute(cypher, emptyMap()) +fun GraphDatabaseService.execute(cypher: String, params: Map) = this.executeTransactionally(cypher, params) + +fun GraphDatabaseService.execute(cypher: String, lambda: ((Result) -> T)) = this.execute(cypher, emptyMap(), lambda) +fun GraphDatabaseService.execute(cypher: String, + params: Map, + lambda: ((Result) -> T)) = this.executeTransactionally(cypher, params, lambda) + +fun GraphDatabaseService.isSystemDb() = this.databaseName() == KafkaUtil.SYSTEM_DATABASE_NAME + +fun GraphDatabaseService.databaseManagementService() = (this as GraphDatabaseAPI).dependencyResolver + .resolveDependency(DatabaseManagementService::class.java, DependencyResolver.SelectionStrategy.SINGLE) + +fun GraphDatabaseService.isDefaultDb() = databaseManagementService().getDefaultDbName() == databaseName() + +fun GraphDatabaseService.registerTransactionEventListener(txHandler: TransactionEventListener<*>) { + databaseManagementService().registerTransactionEventListener(this.databaseName(), txHandler) +} + +fun GraphDatabaseService.unregisterTransactionEventListener(txHandler: TransactionEventListener<*>) { + databaseManagementService().unregisterTransactionEventListener(this.databaseName(), txHandler) +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/Extensions.kt b/extended/src/main/kotlin/apoc/kafka/producer/Extensions.kt new file mode 100644 index 0000000000..6286bc39f3 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/Extensions.kt @@ -0,0 +1,82 @@ +package apoc.kafka.producer + +import apoc.kafka.events.EntityType +import apoc.kafka.events.NodeChange +import apoc.kafka.events.NodePayload +import apoc.kafka.events.OperationType +import apoc.kafka.events.RelationshipNodeChange +import apoc.kafka.events.RelationshipPayload +import apoc.kafka.events.Schema +import apoc.kafka.events.StreamsConstraintType +import apoc.kafka.events.StreamsTransactionEvent +import apoc.kafka.extensions.labelNames +import apoc.kafka.utils.KafkaUtil.getNodeKeys +import org.apache.kafka.clients.producer.RecordMetadata +import org.apache.kafka.common.config.TopicConfig +import org.neo4j.graphdb.Node +import org.neo4j.graphdb.Relationship +import org.neo4j.graphdb.schema.ConstraintDefinition +import org.neo4j.graphdb.schema.ConstraintType + +fun Node.toMap(): Map { + return mapOf("id" to id.toString(), "properties" to allProperties, "labels" to labelNames(), "type" to EntityType.node) +} + +fun Relationship.toMap(): Map { + return mapOf("id" to id.toString(), "properties" to allProperties, "label" to type.name(), + "start" to startNode.toMap(), + "end" to endNode.toMap(), + "type" to EntityType.relationship) +} + +fun RecordMetadata.toMap(): Map = mapOf( + "offset" to offset(), + "timestamp" to timestamp(), + "keySize" to serializedKeySize(), + "valueSize" to serializedValueSize(), + "partition" to partition() +) + +fun ConstraintDefinition.streamsConstraintType(): StreamsConstraintType { + return when (this.constraintType) { + ConstraintType.UNIQUENESS, ConstraintType.NODE_KEY -> StreamsConstraintType.UNIQUE + else -> if (isNodeConstraint()) StreamsConstraintType.NODE_PROPERTY_EXISTS else StreamsConstraintType.RELATIONSHIP_PROPERTY_EXISTS + } +} + +fun ConstraintDefinition.isNodeConstraint(): Boolean { + return try { this.label; true } catch (e: IllegalStateException) { false } +} + +fun ConstraintDefinition.isRelationshipConstraint(): Boolean { + return try { this.relationshipType; true } catch (e: IllegalStateException) { false } +} + +fun StreamsTransactionEvent.asSourceRecordValue(strategy: String): StreamsTransactionEvent? = + if(isStrategyCompact(strategy) && meta.operation == OperationType.deleted) null else this + +fun StreamsTransactionEvent.asSourceRecordKey(strategy: String): Any = + when { + isStrategyCompact(strategy) && payload is NodePayload -> nodePayloadAsMessageKey(payload as NodePayload, schema) + isStrategyCompact(strategy) && payload is RelationshipPayload -> relationshipAsMessageKey(payload as RelationshipPayload) + else -> "${meta.txId + meta.txEventId}-${meta.txEventId}" + } + +private fun nodePayloadAsMessageKey(payload: NodePayload, schema: Schema) = run { + val nodeChange: NodeChange = payload.after ?: payload.before!! + val labels = nodeChange.labels ?: emptyList() + val props: Map = nodeChange.properties ?: emptyMap() + val keys = getNodeKeys(labels, props.keys, schema.constraints) + val ids = props.filterKeys { keys.contains(it) } + + if (ids.isEmpty()) payload.id else mapOf("ids" to ids, "labels" to labels) +} + +private fun RelationshipNodeChange.toKey(): Any = if (ids.isEmpty()) id else mapOf("ids" to ids, "labels" to labels) + +private fun relationshipAsMessageKey(payload: RelationshipPayload) = mapOf( + "start" to payload.start.toKey(), + "end" to payload.end.toKey(), + "label" to payload.label) + +private fun isStrategyCompact(strategy: String) = strategy == TopicConfig.CLEANUP_POLICY_COMPACT \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/RoutingConfiguration.kt b/extended/src/main/kotlin/apoc/kafka/producer/RoutingConfiguration.kt new file mode 100644 index 0000000000..54465a75ec --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/RoutingConfiguration.kt @@ -0,0 +1,251 @@ +package apoc.kafka.producer + +import org.apache.commons.lang3.StringUtils +import org.apache.kafka.common.internals.Topic +import org.neo4j.graphdb.Entity +import org.neo4j.graphdb.Node +import org.neo4j.graphdb.Relationship +import org.neo4j.logging.Log +import apoc.kafka.events.* + + +private val PATTERN_REG: Regex = "^(\\s*\\:*\\s*\\`*\\s*\\w+\\s*(?:\\:*\\s*\\`*\\s*\\:?(?:[\\w\\`|\\*]+)\\s*)*\\`*\\:?)\\s*(?:\\{\\s*(-?[\\w|\\*]+\\s*(?:,\\s*-?[\\w|\\*]+\\s*)*)\\})?\$".toRegex() +private val PATTERN_COLON_REG = "\\s*:\\s*(?=(?:[^\\`]*\\`[^\\`]*\\`)*[^\\`]*\$)".toRegex() +private val PATTERN_COMMA = "\\s*,\\s*".toRegex() +private const val PATTERN_WILDCARD = "*" +private const val PATTERN_PROP_MINUS = '-' +private const val PATTERN_SPLIT = ";" +private const val BACKTICK_CHAR = "`" + +data class RoutingProperties(val all: Boolean, + val include: List, + val exclude: List) { + companion object { + fun from(matcher: MatchResult): RoutingProperties { + val props = matcher.groupValues[2].trim().let { if (it.isEmpty()) emptyList() else it.trim().split( + PATTERN_COMMA + ) } + val include = if (props.isEmpty()) { + emptyList() + } else { + props.filter { it != PATTERN_WILDCARD && !it.startsWith(PATTERN_PROP_MINUS) } + } + val exclude = if (props.isEmpty()) { + emptyList() + } else { + props.filter { it != PATTERN_WILDCARD && it.startsWith(PATTERN_PROP_MINUS) }.map { it.substring(1) } + } + val all = props.isEmpty() || props.contains(PATTERN_WILDCARD) + return RoutingProperties(all = all, include = include, exclude = exclude) + } + } +} + +abstract class RoutingConfiguration { + abstract val topic: String + abstract val all: Boolean + abstract val include: List + abstract val exclude: List + abstract fun filter(entity: Entity): Map +} + +private fun hasLabel(label: String, streamsTransactionEvent: StreamsTransactionEvent): Boolean { + if (streamsTransactionEvent.payload.type == EntityType.relationship) { + return false + } + val payload = when(streamsTransactionEvent.meta.operation) { + OperationType.deleted -> streamsTransactionEvent.payload.before as NodeChange + else -> streamsTransactionEvent.payload.after as NodeChange + } + return payload.labels.orEmpty().contains(label) +} + +private fun isRelationshipType(name: String, streamsTransactionEvent: StreamsTransactionEvent): Boolean { + if (streamsTransactionEvent.payload.type == EntityType.node) { + return false + } + val relationshipChange = streamsTransactionEvent.payload as RelationshipPayload + return relationshipChange.label == name +} + +private fun filterProperties(properties: Map?, routingConfiguration: RoutingConfiguration): Map? { + if (properties == null) { + return null + } + if (!routingConfiguration.all) { + if (routingConfiguration.include.isNotEmpty()) { + return properties!!.filter { prop -> routingConfiguration.include.contains(prop.key) } + } + if (routingConfiguration.exclude.isNotEmpty()) { + return properties!!.filter { prop -> !routingConfiguration.exclude.contains(prop.key) } + } + + } + return properties +} + +data class NodeRoutingConfiguration(val labels: List = emptyList(), + override val topic: String = "neo4j", + override val all: Boolean = true, + override val include: List = emptyList(), + override val exclude: List = emptyList()): RoutingConfiguration() { + + override fun filter(node: Entity): Map { + if (node !is Node) { + throw IllegalArgumentException("argument must be and instance of ${Node::class.java.name}") + } + val properties = filterProperties(node.allProperties, this) + val map = node.toMap().toMutableMap() + map["properties"] = properties + return map + } + + companion object { + fun parse(topic: String, pattern: String): List { + Topic.validate(topic) + if (pattern == PATTERN_WILDCARD) { + return listOf(NodeRoutingConfiguration(topic = topic)) + } + return pattern.split(PATTERN_SPLIT).map { + val matcher = PATTERN_REG.matchEntire(it) + if (matcher == null) { + throw IllegalArgumentException("The pattern $pattern for topic $topic is invalid") + } else { + val labels = matcher.groupValues[1].trim().split(PATTERN_COLON_REG).map { it.replace(BACKTICK_CHAR, StringUtils.EMPTY) }.filter{ it.isNotBlank() } + val properties = RoutingProperties.from(matcher) + NodeRoutingConfiguration(labels = labels, topic = topic, all = properties.all, + include = properties.include, exclude = properties.exclude) + } + } + } + + fun prepareEvent(streamsTransactionEvent: StreamsTransactionEvent, routingConf: List): Map { + return routingConf + .filter { + it.labels.isEmpty() || it.labels.any { hasLabel(it, streamsTransactionEvent) } + } + .map { + val nodePayload = streamsTransactionEvent.payload as NodePayload + val newRecordBefore = if (nodePayload.before != null) { + val recordBefore = nodePayload.before as NodeChange + recordBefore.copy(properties = filterProperties(streamsTransactionEvent.payload.before?.properties, it), + labels = recordBefore.labels) + } else { + null + } + val newRecordAfter = if (nodePayload.after != null) { + val recordAfter = nodePayload.after as NodeChange + recordAfter.copy(properties = filterProperties(streamsTransactionEvent.payload.after?.properties, it), + labels = recordAfter.labels) + } else { + null + } + + val newNodePayload = nodePayload.copy(id = nodePayload.id, + before = newRecordBefore, + after = newRecordAfter) + + val newStreamsEvent = streamsTransactionEvent.copy(schema = streamsTransactionEvent.schema, + meta = streamsTransactionEvent.meta, + payload = newNodePayload) + + it.topic to newStreamsEvent + } + .associateBy({ it.first }, { it.second }) + } + } +} + +data class RelationshipRoutingConfiguration(val name: String = "", + val relKeyStrategy: RelKeyStrategy = RelKeyStrategy.DEFAULT, + override val topic: String = "neo4j", + override val all: Boolean = true, + override val include: List = emptyList(), + override val exclude: List = emptyList()): RoutingConfiguration() { + + override fun filter(relationship: Entity): Map { + if (relationship !is Relationship) { + throw IllegalArgumentException("argument must be and instance of ${Relationship::class.java.name}") + } + val properties = filterProperties(relationship.allProperties, this) + val map = relationship.toMap().toMutableMap() + map["properties"] = properties + return map + } + + companion object { + fun parse(topic: String, pattern: String, keyStrategyString: String = RelKeyStrategy.DEFAULT.toString(), log: Log? = null): List { + Topic.validate(topic) + if (pattern == PATTERN_WILDCARD) { + return listOf(RelationshipRoutingConfiguration(topic = topic)) + } + return pattern.split(PATTERN_SPLIT).map { + val matcher = PATTERN_REG.matchEntire(it) + if (matcher == null) { + throw IllegalArgumentException("The pattern $pattern for topic $topic is invalid") + } else { + val labels = matcher.groupValues[1].split(PATTERN_COLON_REG) + if (labels.size > 1) { + throw IllegalArgumentException("The pattern $pattern for topic $topic is invalid") + } + val properties = RoutingProperties.from(matcher) + + val relKeyStrategy = try { + RelKeyStrategy.valueOf(keyStrategyString.toUpperCase()) + } catch (e: IllegalArgumentException) { + log?.warn("Invalid key strategy setting, switching to default value ${RelKeyStrategy.DEFAULT.toString().toLowerCase()}") + RelKeyStrategy.DEFAULT + } + + RelationshipRoutingConfiguration(name = labels.first().trim().replace(BACKTICK_CHAR, StringUtils.EMPTY), + topic = topic, all = properties.all, + include = properties.include, exclude = properties.exclude, relKeyStrategy = relKeyStrategy) + } + } + } + + fun prepareEvent(streamsTransactionEvent: StreamsTransactionEvent, routingConf: List): Map { + return routingConf + .filter { + it.name.isNullOrBlank() || isRelationshipType(it.name, streamsTransactionEvent) + } + .map { + val relationshipPayload = streamsTransactionEvent.payload as RelationshipPayload + + val newRecordBefore = if (relationshipPayload.before != null) { + val recordBefore = relationshipPayload.before as RelationshipChange + recordBefore.copy(properties = filterProperties(streamsTransactionEvent.payload.before?.properties, it)) + } else { + null + } + val newRecordAfter = if (relationshipPayload.after != null) { + val recordAfter = relationshipPayload.after as RelationshipChange + recordAfter.copy(properties = filterProperties(streamsTransactionEvent.payload.after?.properties, it)) + } else { + null + } + + val newRelationshipPayload = relationshipPayload.copy(id = relationshipPayload.id, + before = newRecordBefore, + after = newRecordAfter, + label = relationshipPayload.label) + + val newStreamsEvent = streamsTransactionEvent.copy(schema = streamsTransactionEvent.schema, + meta = streamsTransactionEvent.meta, + payload = newRelationshipPayload) + + it.topic to newStreamsEvent + } + .associateBy({ it.first }, { it.second }) + } + } +} + +object RoutingConfigurationFactory { + fun getRoutingConfiguration(topic: String, line: String, entityType: EntityType, keyStrategy: String = RelKeyStrategy.DEFAULT.toString(), log: Log? = null): List { + return when (entityType) { + EntityType.node -> NodeRoutingConfiguration.parse(topic, line) + EntityType.relationship -> RelationshipRoutingConfiguration.parse(topic, line, keyStrategy, log) + } + } +} diff --git a/extended/src/main/kotlin/apoc/kafka/producer/StreamsConstraintsService.kt b/extended/src/main/kotlin/apoc/kafka/producer/StreamsConstraintsService.kt new file mode 100644 index 0000000000..7126ddfaa0 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/StreamsConstraintsService.kt @@ -0,0 +1,80 @@ +package apoc.kafka.producer + +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancelAndJoin +import kotlinx.coroutines.delay +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.neo4j.graphdb.DatabaseShutdownException +import org.neo4j.graphdb.GraphDatabaseService +import org.neo4j.graphdb.Label +import org.neo4j.graphdb.RelationshipType +import org.neo4j.graphdb.TransactionFailureException +import apoc.kafka.events.Constraint +import apoc.kafka.utils.KafkaUtil +import java.io.Closeable +import java.util.Collections +import java.util.concurrent.ConcurrentHashMap + +class StreamsConstraintsService(private val db: GraphDatabaseService, private val poolInterval: Long): Closeable { + + private val nodeConstraints = ConcurrentHashMap>() + private val relConstraints = ConcurrentHashMap>() + + private lateinit var job: Job + + override fun close() { + KafkaUtil.ignoreExceptions({ runBlocking { job.cancelAndJoin() } }, UninitializedPropertyAccessException::class.java) + } + + fun start() { + job = GlobalScope.launch(Dispatchers.IO) { + while (isActive) { + if (!db.isAvailable(5000)) return@launch + KafkaUtil.ignoreExceptions({ + db.beginTx().use { + val constraints = it.schema().constraints + constraints + .filter { it.isNodeConstraint() } + .groupBy { it.label.name() } + .forEach { label, constraints -> + nodeConstraints[label] = constraints + .map { Constraint(label, it.propertyKeys.toSet(), it.streamsConstraintType()) } + .toSet() + } + constraints + .filter { it.isRelationshipConstraint() } + .groupBy { it.relationshipType.name() } + .forEach { relationshipType, constraints -> + relConstraints[relationshipType] = constraints + .map { Constraint(relationshipType, it.propertyKeys.toSet(), it.streamsConstraintType()) } + .toSet() + } + } + }, DatabaseShutdownException::class.java, TransactionFailureException::class.java, IllegalStateException::class.java) + delay(poolInterval) + } + } + } + + fun forLabel(label: Label): Set { + return nodeConstraints[label.name()] ?: emptySet() + } + + fun forRelationshipType(relationshipType: RelationshipType): Set { + return relConstraints[relationshipType.name()] ?: emptySet() + } + + fun allForLabels(): Map> { + return Collections.unmodifiableMap(nodeConstraints) + } + + fun allForRelationshipType(): Map> { + return Collections.unmodifiableMap(relConstraints) + } + + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/StreamsEventRouter.kt b/extended/src/main/kotlin/apoc/kafka/producer/StreamsEventRouter.kt new file mode 100644 index 0000000000..e81577ee2b --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/StreamsEventRouter.kt @@ -0,0 +1,32 @@ +package apoc.kafka.producer + +import org.neo4j.graphdb.GraphDatabaseService +import org.neo4j.logging.Log +import apoc.kafka.events.StreamsEvent + + +abstract class StreamsEventRouter(config: Map, db: GraphDatabaseService, log: Log) { + + abstract val eventRouterConfiguration: StreamsEventRouterConfiguration + + abstract fun sendEvents(topic: String, transactionEvents: List, config: Map = emptyMap()) + + abstract fun sendEventsSync(topic: String, transactionEvents: List, config: Map = emptyMap()): List> + + abstract fun start() + + abstract fun stop() + + open fun printInvalidTopics() {} + +} + + +object StreamsEventRouterFactory { + fun getStreamsEventRouter(config: Map, db: GraphDatabaseService, log: Log): StreamsEventRouter { + return Class.forName(config.getOrDefault("apoc.kafka.router", "apoc.kafka.producer.kafka.KafkaEventRouter")) + .getConstructor(Map::class.java, GraphDatabaseService::class.java, Log::class.java) + .newInstance(config, db, log) as StreamsEventRouter + } +} + diff --git a/extended/src/main/kotlin/apoc/kafka/producer/StreamsEventRouterConfiguration.kt b/extended/src/main/kotlin/apoc/kafka/producer/StreamsEventRouterConfiguration.kt new file mode 100644 index 0000000000..292361ea9b --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/StreamsEventRouterConfiguration.kt @@ -0,0 +1,99 @@ +package apoc.kafka.producer + +import org.apache.commons.lang3.StringUtils +import org.neo4j.logging.Log +import apoc.kafka.config.StreamsConfig +import apoc.kafka.events.EntityType +import apoc.kafka.events.RelKeyStrategy + + +private inline fun filterMap(config: Map, routingPrefix: String, dbName: String = "", routingSuffix: String? = null, log: Log? = null): List { + val entityType = when (T::class) { + NodeRoutingConfiguration::class -> EntityType.node + RelationshipRoutingConfiguration::class -> EntityType.relationship + else -> throw IllegalArgumentException("The class must be an instance of RoutingConfiguration") + } + return config + .filterKeys { + val startWithPrefixAndNotEndWithSuffix = it.startsWith(routingPrefix) && routingSuffix?.let { suffix -> !it.endsWith(suffix) } ?: true + if (it.contains(StreamsRoutingConfigurationConstants.FROM)) { + val topicDbName = it.replace(routingPrefix, StringUtils.EMPTY) + .split(StreamsRoutingConfigurationConstants.FROM)[1] + startWithPrefixAndNotEndWithSuffix && topicDbName == dbName // for `from.` we compare the routing prefix and the db name + } else { + // for the default db we only filter by routingPrefix + dbName == "" && startWithPrefixAndNotEndWithSuffix + } + } + .flatMap { + val prefixAndTopic = it.key.split(StreamsRoutingConfigurationConstants.FROM)[0] + + val keyStrategy = routingSuffix?.let { suffix -> + print("suffix - $suffix") + config.entries.firstOrNull{ it.key.startsWith(prefixAndTopic) && it.key.endsWith(suffix) }?.value + } ?: RelKeyStrategy.DEFAULT.toString().toLowerCase() + + RoutingConfigurationFactory + .getRoutingConfiguration(prefixAndTopic.replace(routingPrefix, StringUtils.EMPTY), + it.value, entityType, keyStrategy, log) as List + } +} + +private object StreamsRoutingConfigurationConstants { + const val NODE_ROUTING_KEY_PREFIX: String = "apoc.kafka.source.topic.nodes." + const val REL_ROUTING_KEY_PREFIX: String = "apoc.kafka.source.topic.relationships." + const val SCHEMA_POLLING_INTERVAL = "apoc.kafka.source.schema.polling.interval" + const val FROM = ".from." + const val KEY_STRATEGY_SUFFIX = ".key_strategy" +} + +data class StreamsEventRouterConfiguration(val enabled: Boolean = StreamsConfig.SOURCE_ENABLED_VALUE, + val proceduresEnabled: Boolean = StreamsConfig.PROCEDURES_ENABLED_VALUE, + val nodeRouting: List = listOf( + NodeRoutingConfiguration() + ), + val relRouting: List = listOf( + RelationshipRoutingConfiguration() + ), + val schemaPollingInterval: Long = 300000) { + + fun allTopics(): List { + val nodeTopics = nodeRouting.map { it.topic } + val relTopics = relRouting.map { it.topic } + return nodeTopics + relTopics + } + + companion object { + + fun from(streamsConfig: Map, dbName: String, isDefaultDb: Boolean, log: Log? = null): StreamsEventRouterConfiguration { + var nodeRouting = filterMap(config = streamsConfig, + routingPrefix = StreamsRoutingConfigurationConstants.NODE_ROUTING_KEY_PREFIX, + dbName = dbName) + var relRouting = filterMap(config = streamsConfig, + routingPrefix = StreamsRoutingConfigurationConstants.REL_ROUTING_KEY_PREFIX, + dbName = dbName, + routingSuffix = StreamsRoutingConfigurationConstants.KEY_STRATEGY_SUFFIX, + log = log) + + if (isDefaultDb) { + nodeRouting += filterMap(config = streamsConfig, + routingPrefix = StreamsRoutingConfigurationConstants.NODE_ROUTING_KEY_PREFIX + ) + relRouting += filterMap(config = streamsConfig, + routingPrefix = StreamsRoutingConfigurationConstants.REL_ROUTING_KEY_PREFIX, + routingSuffix = StreamsRoutingConfigurationConstants.KEY_STRATEGY_SUFFIX, + log = log) + } + + val default = StreamsEventRouterConfiguration() + return default.copy( + enabled = StreamsConfig.isSourceEnabled(streamsConfig, dbName), + proceduresEnabled = StreamsConfig.hasProceduresEnabled(streamsConfig, dbName), + nodeRouting = if (nodeRouting.isEmpty()) listOf(NodeRoutingConfiguration(topic = dbName)) else nodeRouting, + relRouting = if (relRouting.isEmpty()) listOf(RelationshipRoutingConfiguration(topic = dbName)) else relRouting, + schemaPollingInterval = streamsConfig.getOrDefault(StreamsRoutingConfigurationConstants.SCHEMA_POLLING_INTERVAL, default.schemaPollingInterval).toString().toLong() + ) + } + + } +} diff --git a/extended/src/main/kotlin/apoc/kafka/producer/StreamsRouterConfigurationListener.kt b/extended/src/main/kotlin/apoc/kafka/producer/StreamsRouterConfigurationListener.kt new file mode 100644 index 0000000000..d856aed41e --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/StreamsRouterConfigurationListener.kt @@ -0,0 +1,67 @@ +package apoc.kafka.producer + +import apoc.kafka.PublishProcedures +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import org.apache.commons.configuration2.ImmutableConfiguration +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log +import org.neo4j.plugin.configuration.ConfigurationLifecycleUtils +import org.neo4j.plugin.configuration.EventType +import org.neo4j.plugin.configuration.listners.ConfigurationLifecycleListener +import apoc.kafka.events.StreamsPluginStatus +import apoc.kafka.extensions.isDefaultDb +import apoc.kafka.producer.kafka.KafkaConfiguration +//import apoc.kafka.producer.procedures.StreamsProcedures +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.getConsumerProperties + +class StreamsRouterConfigurationListener(private val db: GraphDatabaseAPI, + private val log: Log) { + private val mutex = Mutex() + +// private var txHandler: StreamsTransactionEventHandler? = null +// private var streamsConstraintsService: StreamsConstraintsService? = null + private var streamsEventRouter: StreamsEventRouter? = null +// private var streamsEventRouterConfiguration: StreamsEventRouterConfiguration? = null + + private var lastConfig: KafkaConfiguration? = null + + + fun shutdown() { +// val isShuttingDown = txHandler?.status() == StreamsPluginStatus.RUNNING +// if (isShuttingDown) { +// log.info("[Sink] Shutting down the Streams Source Module") +// } +// if (streamsEventRouterConfiguration?.enabled == true) { +// streamsConstraintsService?.close() + streamsEventRouter?.stop() + streamsEventRouter = null + PublishProcedures.unregister(db) +// txHandler?.stop() +// txHandler = null +// } +// if (isShuttingDown) { +// log.info("[Source] Shutdown of the Streams Source Module completed") +// } + } + + fun start(configMap: Map) { + lastConfig = KafkaConfiguration.create(configMap) +// streamsEventRouterConfiguration = StreamsEventRouterConfiguration.from(configMap, db.databaseName(), isDefaultDb = db.isDefaultDb(), log) + streamsEventRouter = StreamsEventRouterFactory.getStreamsEventRouter(configMap, db, log) +// streamsConstraintsService = StreamsConstraintsService(db, streamsEventRouterConfiguration!!.schemaPollingInterval) +// if (streamsEventRouterConfiguration?.enabled == true || streamsEventRouterConfiguration?.proceduresEnabled == true) { +// streamsConstraintsService!!.start() + streamsEventRouter!!.start() +// } +// txHandler = StreamsTransactionEventHandler(streamsEventRouter!!, db, streamsConstraintsService!!) +// if (streamsEventRouterConfiguration?.enabled == true) { + streamsEventRouter!!.printInvalidTopics() +// txHandler!!.start() +// } + PublishProcedures.register(db, streamsEventRouter!!/*, txHandler!!*/) + log.info("[Source] Streams Source module initialised") + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/StreamsTransactionEventHandler.kt b/extended/src/main/kotlin/apoc/kafka/producer/StreamsTransactionEventHandler.kt new file mode 100644 index 0000000000..a8cee43efe --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/StreamsTransactionEventHandler.kt @@ -0,0 +1,366 @@ +//package apoc.kafka.producer +// +//import org.neo4j.graphdb.GraphDatabaseService +//import org.neo4j.graphdb.Transaction +//import kotlinx.coroutines.async +//import kotlinx.coroutines.runBlocking +//import org.neo4j.graphdb.event.TransactionData +//import org.neo4j.graphdb.event.TransactionEventListener +//import org.neo4j.kernel.internal.GraphDatabaseAPI +//import apoc.kafka.events.* +//import apoc.kafka.extensions.labelNames +//import apoc.kafka.extensions.registerTransactionEventListener +//import apoc.kafka.extensions.unregisterTransactionEventListener +//import apoc.kafka.producer.events.* +//import apoc.kafka.utils.KafkaUtil.getNodeKeys +//import java.net.InetAddress +//import java.util.concurrent.atomic.AtomicInteger +//import java.util.concurrent.atomic.AtomicReference +// +// +//class StreamsTransactionEventHandler(private val router: StreamsEventRouter, +// private val db: GraphDatabaseAPI, +// //private val streamsConstraintsService: StreamsConstraintsService +//) +// : TransactionEventListener { +// +// private val status = AtomicReference(StreamsPluginStatus.UNKNOWN) +// +// fun start() { +// db.registerTransactionEventListener(this) +// status.set(StreamsPluginStatus.RUNNING) +// } +// +// fun stop() { +// db.unregisterTransactionEventListener(this) +// status.set(StreamsPluginStatus.STOPPED) +// } +// +// fun status() = status.get() +// +// private val configuration = router.eventRouterConfiguration +// +// private val nodeRoutingLabels = configuration.nodeRouting +// .flatMap { it.labels } +// private val relRoutingTypesAndStrategies = configuration.relRouting +// .map { it.name to it.relKeyStrategy }.toMap() +// +// private val nodeAll = configuration.nodeRouting.any { it.labels.isEmpty() } +// private val relAll = configuration.relRouting.any { it.name.isNullOrBlank() } +// +// // As getting host name in some network configuration can be expensive +// // this can lead to slowness in the start-up process (i.e. slowing the leader +// // election in case of a Causal Cluster). We define it a `lazy` value +// // computing it at the first invocation +// private val hostName by lazy { InetAddress.getLocalHost().hostName } +// +// /** +// * Wrap the payload into a StreamsTransactionEvent for the eventId +// */ +// private fun payloadToEvent(operation: OperationType, payload: Payload, schema: Schema, txd: TransactionData, eventId: Int, eventCount: Int) : StreamsTransactionEvent{ +// val meta = StreamsEventMetaBuilder() +// .withOperation(operation) +// .withTransactionEventId(eventId) +// .withTransactionEventsCount(eventCount) +// .withUsername(txd.username()) +// .withTimestamp(txd.commitTime) +// .withTransactionId(txd.transactionId) +// .withHostname(hostName) +// .build() +// +// val builder = StreamsTransactionEventBuilder() +// .withMeta(meta) +// .withPayload(payload) +// .withSchema(schema) +// +// return builder.build() +// } +// +// private fun mapToStreamsEvent(operation: OperationType, payloads: List, txd: TransactionData, totalEventsCount: Int, accumulator: AtomicInteger, +// nodeConstraints: Map>, relConstraints: Map>) : List { +// +// val getNodeConstraintsByLabels: (Collection?) -> Set = { labels -> +// labels.orEmpty() +// .flatMap { label -> nodeConstraints[label].orEmpty() } +// .toSet() +// } +// +// return payloads.map { payload -> +// accumulator.incrementAndGet() +// val schema = if (payload is NodePayload) { +// val constraints = getNodeConstraintsByLabels((payload.after ?: payload.before)!!.labels) +// SchemaBuilder() +// .withPayload(payload) +// .withConstraints(constraints) +// .build() +// } else { +// val relationshipPayload = (payload as RelationshipPayload) +// val relType = relationshipPayload.label +// val constraints = (relConstraints[relType].orEmpty() +// + getNodeConstraintsByLabels(relationshipPayload.start.labels) +// + getNodeConstraintsByLabels(relationshipPayload.end.labels)) +// SchemaBuilder() +// .withPayload(payload) +// .withConstraints(constraints) +// .build() +// } +// payloadToEvent(operation, payload, schema, txd, accumulator.get(), totalEventsCount) +// } +// } +// +// private fun allOrFiltered(iterable: Iterable, +// all: Boolean, +// predicate: (T) -> Boolean): Iterable = when (all) { +// true -> iterable +// else -> iterable.filter(predicate) +// } +// +// private fun buildNodeChanges(txd: TransactionData, builder: PreviousTransactionDataBuilder): PreviousTransactionDataBuilder { +// val createdPayload = allOrFiltered(txd.createdNodes(), nodeAll) +// { it.labelNames().any { nodeRoutingLabels.contains(it) } } +// .map { +// val labels = it.labelNames() +// +// val afterNode = NodeChangeBuilder() +// .withLabels(labels) +// .withProperties(it.allProperties) +// .build() +// +// val payload = NodePayloadBuilder() +// .withId(it.id.toString()) +// .withAfter(afterNode) +// .build() +// +// it.id.toString() to payload +// } +// .toMap() +// +// // returns a Map> where the K is true if the node has been deleted +// val removedNodeProps = txd.removedNodeProperties() +// .map { txd.deletedNodes().contains(it.entity()) to it } +// .groupBy({ it.first }, { it.second }) +// .toMap() +// val removedLbls = txd.removedLabels() +// .map { txd.deletedNodes().contains(it.node()) to it } +// .groupBy({ it.first }, { it.second }) +// .toMap() +// +// // labels and properties of deleted nodes are unreachable +// val deletedNodeProperties = removedNodeProps.getOrDefault(true, emptyList()) +// .map { it.entity().id to (it.key() to it.previouslyCommittedValue()) } +// .groupBy({ it.first },{ it.second }) // { nodeId -> [(k,v)] } +// .mapValues { it.value.toMap() } +// +// val deletedLabels = removedLbls.getOrDefault(true, emptyList()) +// .map { labelEntry -> labelEntry.node().id to labelEntry.label().name() } // [ (nodeId, [label]) ] +// .groupBy({it.first},{it.second}) // { nodeId -> [label] } +// +// val removedNodeProperties = removedNodeProps.getOrDefault(false, emptyList()) +// val removedLabels = removedLbls.getOrDefault(false, emptyList()) +// +// val deletedPayload = txd.deletedNodes() +// .map { +// val beforeNode = NodeChangeBuilder() +// .withLabels(deletedLabels.getOrDefault(it.id, emptyList())) +// .withProperties(deletedNodeProperties.getOrDefault(it.id, emptyMap())) +// .build() +// +// val payload = NodePayloadBuilder() +// .withId(it.id.toString()) +// .withBefore(beforeNode) +// .build() +// +// it.id.toString() to payload +// } +// .toMap() +// +// //don't change the order of the with methods +// return builder.withLabels(txd.assignedLabels(),removedLabels) +// .withNodeProperties(txd.assignedNodeProperties(),removedNodeProperties) +// .withNodeCreatedPayloads(createdPayload) +// .withNodeDeletedPayloads(deletedPayload) +// .withDeletedLabels(deletedLabels) +// } +// +// private fun buildRelationshipChanges(txd: TransactionData, builder: PreviousTransactionDataBuilder, nodeConstraints: Map>): PreviousTransactionDataBuilder { +// // returns a Map> where the K is true if the node has been deleted +// val removeRelProps = allOrFiltered(txd.removedRelationshipProperties(), relAll) +// { relRoutingTypesAndStrategies.containsKey(it.entity().type.name()) } +// .map { txd.deletedRelationships().contains(it.entity()) to it } +// .groupBy({ it.first }, { it.second }) +// .toMap() +// +// val deletedRelProperties = removeRelProps.getOrDefault(true, emptyList()) +// .map { it.entity().id to (it.key() to it.previouslyCommittedValue()) } +// .groupBy({ it.first }, { it.second }) // { nodeId -> [(k,v)] } +// .mapValues { it.value.toMap() } +// +// val nodeConstraintsCache = mutableMapOf, List>() +// val filterNodeConstraintCache : (List) -> List = { startLabels -> +// nodeConstraintsCache.computeIfAbsent(startLabels) { +// nodeConstraints +// .filterKeys { startLabels.contains(it) } +// .values +// .flatten() +// } +// } +// +// val createdRelPayload = allOrFiltered(txd.createdRelationships(), relAll) +// { relRoutingTypesAndStrategies.containsKey(it.type.name()) } +// .map { +// val afterRel = RelationshipChangeBuilder() +// .withProperties(it.allProperties) +// .build() +// +// val relKeyStrategy = relRoutingTypesAndStrategies.getOrDefault(it.type.name(), RelKeyStrategy.DEFAULT) +// +// val startLabels = it.startNode.labelNames() +// val startNodeConstraints = filterNodeConstraintCache(startLabels) +// val startKeys = getNodeKeys(startLabels, it.startNode.propertyKeys.toSet(), startNodeConstraints, relKeyStrategy) +// .toTypedArray() +// +// val endLabels = it.endNode.labelNames() +// val endNodeConstraints = filterNodeConstraintCache(endLabels) +// val endKeys = getNodeKeys(endLabels, it.endNode.propertyKeys.toSet(), endNodeConstraints, relKeyStrategy) +// .toTypedArray() +// +// val payload = RelationshipPayloadBuilder() +// .withId(it.id.toString()) +// .withName(it.type.name()) +// .withStartNode(it.startNode.id.toString(), startLabels, it.startNode.getProperties(*startKeys)) +// .withEndNode(it.endNode.id.toString(), endLabels, it.endNode.getProperties(*endKeys)) +// .withAfter(afterRel) +// .build() +// +// it.id.toString() to payload +// } +// .toMap() +// +// val deletedRelPayload = allOrFiltered(txd.deletedRelationships(), relAll) +// { relRoutingTypesAndStrategies.containsKey(it.type.name()) } +// .map { +// val beforeRel = RelationshipChangeBuilder() +// .withProperties(deletedRelProperties.getOrDefault(it.id, emptyMap())) +// .build() +// +// // start and end can be unreachable in case of detach delete +// val isStartNodeDeleted = txd.isDeleted(it.startNode) +// val isEndNodeDeleted = txd.isDeleted(it.endNode) +// +// val startNodeLabels = if (isStartNodeDeleted) builder.deletedLabels(it.startNode.id) else it.startNode.labelNames() +// val endNodeLabels = if (isEndNodeDeleted) builder.deletedLabels(it.endNode.id) else it.endNode.labelNames() +// +// val startPropertyKeys = if (isStartNodeDeleted) { +// builder.nodeDeletedPayload(it.startNodeId)?.before?.properties?.keys.orEmpty() +// } else { +// it.startNode.propertyKeys +// } +// +// val endPropertyKeys = if (isEndNodeDeleted) { +// builder.nodeDeletedPayload(it.endNodeId)?.before?.properties?.keys.orEmpty() +// } else { +// it.endNode.propertyKeys +// } +// val relKeyStrategy = relRoutingTypesAndStrategies.getOrDefault(it.type.name(), RelKeyStrategy.DEFAULT) +// +// val startNodeConstraints = filterNodeConstraintCache(startNodeLabels) +// val startKeys = getNodeKeys(startNodeLabels, startPropertyKeys.toSet(), startNodeConstraints, relKeyStrategy) +// +// val endNodeConstraints = filterNodeConstraintCache(endNodeLabels) +// val endKeys = getNodeKeys(endNodeLabels, endPropertyKeys.toSet(), endNodeConstraints, relKeyStrategy) +// +// val startProperties = if (isStartNodeDeleted) { +// val payload = builder.nodeDeletedPayload(it.startNode.id)!! +// (payload.after ?: payload.before)?.properties?.filterKeys { startKeys.contains(it) }.orEmpty() +// } else { +// it.startNode.getProperties(*startKeys.toTypedArray()) +// } +// val endProperties = if (isEndNodeDeleted) { +// val payload = builder.nodeDeletedPayload(it.endNode.id)!! +// (payload.after ?: payload.before)?.properties?.filterKeys { endKeys.contains(it) }.orEmpty() +// } else { +// it.endNode.getProperties(*endKeys.toTypedArray()) +// } +// +// val payload = RelationshipPayloadBuilder() +// .withId(it.id.toString()) +// .withName(it.type.name()) +// .withStartNode(it.startNode.id.toString(), startNodeLabels, startProperties) +// .withEndNode(it.endNode.id.toString(), endNodeLabels, endProperties) +// .withBefore(beforeRel) +// .build() +// +// it.id.toString() to payload +// } +// .toMap() +// +// val removedRelsProperties = removeRelProps.getOrDefault(false, emptyList()) +// +// //don't change the order of the with methods +// return builder.withRelProperties(txd.assignedRelationshipProperties(), removedRelsProperties) +// .withRelCreatedPayloads(createdRelPayload) +// .withRelDeletedPayloads(deletedRelPayload) +// .withRelRoutingTypesAndStrategies(relRoutingTypesAndStrategies) +// } +// +// override fun afterRollback(p0: TransactionData?, p1: PreviousTransactionData?, db: GraphDatabaseService?) {} +// +// override fun afterCommit(txd: TransactionData, previousTxd: PreviousTransactionData, db: GraphDatabaseService?) = runBlocking { +// val nodePrevious = previousTxd.nodeData +// val relPrevious = previousTxd.relData +// +// val totalEventsCount = nodePrevious.createdPayload.size + nodePrevious.deletedPayload.size + nodePrevious.updatedPayloads.size + +// relPrevious.createdPayload.size + relPrevious.deletedPayload.size + relPrevious.updatedPayloads.size +// +// if (totalEventsCount == 0) { +// return@runBlocking +// } +// +// val eventAcc = AtomicInteger(-1) +// val events = mutableListOf() +// val nodeCreated = async { mapToStreamsEvent(OperationType.created, nodePrevious.createdPayload, txd, totalEventsCount, eventAcc, +// previousTxd.nodeConstraints, previousTxd.relConstraints) } +// val nodeDeleted = async { mapToStreamsEvent(OperationType.deleted, nodePrevious.deletedPayload, txd, totalEventsCount, eventAcc, +// previousTxd.nodeConstraints, previousTxd.relConstraints) } +// val nodeUpdated = async { mapToStreamsEvent(OperationType.updated, nodePrevious.updatedPayloads, txd, totalEventsCount, eventAcc, +// previousTxd.nodeConstraints, previousTxd.relConstraints) } +// val relCreated = async { mapToStreamsEvent(OperationType.created, relPrevious.createdPayload, txd, totalEventsCount, eventAcc, +// previousTxd.nodeConstraints, previousTxd.relConstraints) } +// val relDeleted = async { mapToStreamsEvent(OperationType.deleted, relPrevious.deletedPayload, txd, totalEventsCount, eventAcc, +// previousTxd.nodeConstraints, previousTxd.relConstraints) } +// val relUpdated = async { mapToStreamsEvent(OperationType.updated, relPrevious.updatedPayloads, txd, totalEventsCount, eventAcc, +// previousTxd.nodeConstraints, previousTxd.relConstraints) } +// events.addAll(nodeCreated.await()) +// events.addAll(nodeDeleted.await()) +// events.addAll(nodeUpdated.await()) +// events.addAll(relCreated.await()) +// events.addAll(relDeleted.await()) +// events.addAll(relUpdated.await()) +// +// val topicEventsMap = events.flatMap { event -> +// val map = when (event.payload.type) { +// EntityType.node -> NodeRoutingConfiguration.prepareEvent(event, configuration.nodeRouting) +// EntityType.relationship -> RelationshipRoutingConfiguration.prepareEvent(event, configuration.relRouting) +// } +// map.entries +// } +// .groupBy({ it.key }, { it.value }) +// +// topicEventsMap.forEach { +// router.sendEvents(it.key, it.value) +// } +// } +// +// override fun beforeCommit(txd: TransactionData, tx: Transaction?, db: GraphDatabaseService?): PreviousTransactionData { +//// val nodeConstraints = streamsConstraintsService.allForLabels() +//// val relConstraints = streamsConstraintsService.allForRelationshipType() +//// var builder = PreviousTransactionDataBuilder() +//// .withNodeConstraints(nodeConstraints) +//// .withRelConstraints(relConstraints) +//// +//// builder = buildNodeChanges(txd, builder) +//// builder = buildRelationshipChanges(txd, builder, nodeConstraints) +//// +//// return builder.build() +// } +//} diff --git a/extended/src/main/kotlin/apoc/kafka/producer/events/PreviousTransactionData.kt b/extended/src/main/kotlin/apoc/kafka/producer/events/PreviousTransactionData.kt new file mode 100644 index 0000000000..fe9ab224b3 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/events/PreviousTransactionData.kt @@ -0,0 +1,294 @@ +package apoc.kafka.producer.events + +import apoc.kafka.events.* +import org.neo4j.graphdb.Node +import org.neo4j.graphdb.Relationship +import org.neo4j.graphdb.event.LabelEntry +import org.neo4j.graphdb.event.PropertyEntry +import apoc.kafka.extensions.labelNames +import apoc.kafka.utils.KafkaUtil.getNodeKeys + +data class PreviousNodeTransactionData(val nodeProperties: Map>, + val nodeLabels: Map>, + val updatedPayloads: List = emptyList(), + val createdPayload: List, + val deletedPayload: List) + +data class PreviousRelTransactionData(val relProperties: Map> = emptyMap(), + val updatedPayloads: List = emptyList(), + val createdPayload: List = emptyList(), + val deletedPayload: List = emptyList()) + +data class PreviousTransactionData(val nodeData: PreviousNodeTransactionData, + val relData: PreviousRelTransactionData, + val nodeConstraints: Map>, + val relConstraints: Map>) + + +/** + * Build a data class containing the previous (before) state of the nodes/relationships + */ +class PreviousTransactionDataBuilder { + + //nodes + private var nodeProperties : Map> = emptyMap() + private var nodeLabels: Map> = emptyMap() + private var updatedNodes : Set = emptySet() + private var nodeCreatedPayload: Map = emptyMap() + private var nodeDeletedPayload: Map = emptyMap() + private var deletedLabels: Map> = emptyMap() + + //relationships + private var relProperties : Map> = emptyMap() + private var updatedRels : Set = emptySet() + private var relCreatedPayload: Map = emptyMap() + private var relDeletedPayload: Map = emptyMap() + private var relRoutingTypesAndStrategies: Map = emptyMap() + + private lateinit var nodeConstraints: Map> + private lateinit var relConstraints: Map> + + fun withNodeConstraints(nodeConstraints: Map>): PreviousTransactionDataBuilder { + this.nodeConstraints = nodeConstraints + return this + } + + fun withRelConstraints(relConstraints: Map>): PreviousTransactionDataBuilder { + this.relConstraints = relConstraints + return this + } + + fun build() : PreviousTransactionData { + val createdNodeIds = nodeCreatedPayload.keys + + val updatedPayloads = updatedNodes + .filter { ! createdNodeIds.contains(it.id.toString()) } + .map { + val labelsBefore = nodeLabels.getOrDefault(it.id, it.labelNames()) + val propsBefore = nodeProperties.getOrDefault(it.id, emptyMap()) + + val beforeNode = NodeChangeBuilder() + .withLabels(labelsBefore) + .withProperties(propsBefore) + .build() + + val labelsAfter = it.labelNames() + + val afterNode = NodeChangeBuilder() + .withLabels(labelsAfter) + .withProperties(it.allProperties) + .build() + + val payload = NodePayloadBuilder() + .withId(it.id.toString()) + .withBefore(beforeNode) + .withAfter(afterNode) + .build() + + payload + } + + val nodeData = PreviousNodeTransactionData(nodeProperties, nodeLabels, + updatedPayloads, nodeCreatedPayload.values.toList(), nodeDeletedPayload.values.toList()) + + val notUpdatedRels = (relCreatedPayload.keys + relDeletedPayload.keys).toSet() + + val nodeConstraintsCache = mutableMapOf, List>() + + val updatedRelPayloads = updatedRels + .filter { ! notUpdatedRels.contains(it.id.toString()) } + .map { + val propsBefore = relProperties.getOrDefault(it.id, emptyMap()) + + val beforeNode = RelationshipChangeBuilder() + .withProperties(propsBefore) + .build() + + val afterNode = RelationshipChangeBuilder() + .withProperties(it.allProperties) + .build() + + val startLabels = it.startNode.labelNames() + val startNodeConstraints = nodeConstraintsCache.computeIfAbsent(startLabels) { + nodeConstraints + .filterKeys { startLabels.contains(it) } + .flatMap { it.value } + } + val relKeyStrategy = relRoutingTypesAndStrategies.getOrDefault(it.type.name(), + RelKeyStrategy.DEFAULT + ) + + val startNodeKeys = getNodeKeys(startLabels, it.startNode.propertyKeys.toSet(), startNodeConstraints, relKeyStrategy) + .toTypedArray() + + + val endLabels = it.endNode.labelNames() + val endNodeConstraints = nodeConstraintsCache.computeIfAbsent(endLabels) { + nodeConstraints + .filterKeys { endLabels.contains(it) } + .flatMap { it.value } + } + val endNodeKeys = getNodeKeys(endLabels, it.endNode.propertyKeys.toSet(), endNodeConstraints, relKeyStrategy) + .toTypedArray() + + val payload = RelationshipPayloadBuilder() + .withId(it.id.toString()) + .withName(it.type.name()) + .withStartNode(it.startNode.id.toString(), startLabels, it.startNode.getProperties(*startNodeKeys)) + .withEndNode(it.endNode.id.toString(), endLabels, it.endNode.getProperties(*endNodeKeys)) + .withBefore(beforeNode) + .withAfter(afterNode) + .build() + + payload + } + + val relData = PreviousRelTransactionData(createdPayload = this.relCreatedPayload.values.toList(), + deletedPayload = this.relDeletedPayload.values.toList(), + updatedPayloads = updatedRelPayloads) + + return PreviousTransactionData(nodeData = nodeData, relData = relData, nodeConstraints = nodeConstraints, relConstraints = relConstraints) + } + + fun withLabels(assignedLabels: Iterable, removedLabels: Iterable): PreviousTransactionDataBuilder { + val assignedPreviousLabels = assignedLabels + .map { labelEntry -> Pair(labelEntry.node().id, labelEntry.node().labels.filter { it != labelEntry.label() }.map { it.name() }.toList()) } // [ (nodeId, [label]) ] + .groupBy({it.first},{it.second}) // { nodeId -> [ [label] ] } + .mapValues { it.value.flatten() } // { nodeId -> [label] } + + val removedPreviousLabels = removedLabels + .map { labelEntry -> Pair(labelEntry.node().id, labelEntry.node().labelNames().toList().plus(labelEntry.label().name())) } // [ (nodeId, [label]) ] + .groupBy({it.first},{it.second}) // { nodeId -> [ [label] ] } + .mapValues { it.value.flatten() } // { nodeId -> [label] } + + + updatedNodes = updatedNodes.plus(assignedLabels + .map { it.node() } + .toSet() ) + + updatedNodes = updatedNodes.plus(removedLabels + .map { it.node() } + .toSet() ) + + nodeLabels = assignedPreviousLabels.plus(removedPreviousLabels) + + val allProps = mutableMapOf>() + updatedNodes.forEach { + allProps.putIfAbsent(it.id, it.allProperties) + } + + nodeProperties = nodeProperties.plus(allProps) + + return this + } + + fun withNodeProperties(assignedNodeProperties: Iterable>, removedNodeProperties: Iterable>): PreviousTransactionDataBuilder { + val allProps = mutableMapOf>() + assignedNodeProperties.filter { it.previouslyCommittedValue() == null } + .forEach { + var props = allProps.getOrDefault(it.entity().id, it.entity().allProperties.toMutableMap()) + props.remove(it.key()) + allProps.putIfAbsent(it.entity().id, props) + } + + assignedNodeProperties.filter { it.previouslyCommittedValue() != null } + .forEach { + var props = allProps.getOrDefault(it.entity().id, it.entity().allProperties.toMutableMap()) + props.put(it.key(), it.previouslyCommittedValue()) + allProps.putIfAbsent(it.entity().id, props) + } + + removedNodeProperties.forEach { + var props = allProps.getOrDefault(it.entity().id, it.entity().allProperties.toMutableMap()) + props.put(it.key(), it.previouslyCommittedValue()) + allProps.putIfAbsent(it.entity().id, props) + } + + updatedNodes = updatedNodes.plus(assignedNodeProperties + .map { it.entity() } + .toSet() ) + + updatedNodes = updatedNodes.plus(removedNodeProperties + .map { it.entity() } + .toSet() ) + + nodeProperties = nodeProperties.plus(allProps) + + return this + } + + fun withNodeCreatedPayloads(createdPayload: Map): PreviousTransactionDataBuilder { + this.nodeCreatedPayload = createdPayload + return this + } + + fun withNodeDeletedPayloads(deletedPayload: Map): PreviousTransactionDataBuilder { + this.nodeDeletedPayload = deletedPayload + return this + } + + fun withRelCreatedPayloads(createdPayload: Map): PreviousTransactionDataBuilder { + this.relCreatedPayload = createdPayload + return this + } + + fun withRelRoutingTypesAndStrategies(relRoutingTypesAndStrategies: Map): PreviousTransactionDataBuilder { + this.relRoutingTypesAndStrategies = relRoutingTypesAndStrategies + return this + } + + fun withRelDeletedPayloads(deletedPayload: Map): PreviousTransactionDataBuilder { + this.relDeletedPayload = deletedPayload + return this + } + + fun withRelProperties(assignedRelProperties: Iterable>, removedRelProperties: Iterable>): PreviousTransactionDataBuilder { + val allProps = mutableMapOf>() + assignedRelProperties.filter { it.previouslyCommittedValue() == null } + .forEach { + var props = allProps.getOrDefault(it.entity().id, it.entity().allProperties.toMutableMap()) + props.remove(it.key()) + allProps.putIfAbsent(it.entity().id, props) + } + + assignedRelProperties.filter { it.previouslyCommittedValue() != null } + .forEach { + var props = allProps.getOrDefault(it.entity().id, it.entity().allProperties.toMutableMap()) + props.put(it.key(), it.previouslyCommittedValue()) + allProps.putIfAbsent(it.entity().id, props) + } + + removedRelProperties.forEach { + var props = allProps.getOrDefault(it.entity().id, it.entity().allProperties.toMutableMap()) + props.put(it.key(), it.previouslyCommittedValue()) + allProps.putIfAbsent(it.entity().id, props) + } + + updatedRels = updatedRels.plus(assignedRelProperties + .map { it.entity() } + .toSet() ) + + updatedRels = updatedRels.plus(removedRelProperties + .map { it.entity() } + .toSet() ) + + relProperties = relProperties.plus(allProps) + + return this + } + + fun withDeletedLabels(deletedLabels: Map>): PreviousTransactionDataBuilder { + this.deletedLabels = deletedLabels + return this + } + + fun deletedLabels(id : Long): List{ + return this.deletedLabels.getOrDefault(id, emptyList()) + } + + fun nodeDeletedPayload(id: Long): NodePayload? { + return this.nodeDeletedPayload[id.toString()] + } + + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/events/StreamsEventBuilder.kt b/extended/src/main/kotlin/apoc/kafka/producer/events/StreamsEventBuilder.kt new file mode 100644 index 0000000000..6c2d7c03dd --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/events/StreamsEventBuilder.kt @@ -0,0 +1,298 @@ +package apoc.kafka.producer.events + +import org.neo4j.graphdb.Node +import org.neo4j.graphdb.Path +import org.neo4j.graphdb.Relationship +import apoc.kafka.producer.NodeRoutingConfiguration +import apoc.kafka.producer.RelationshipRoutingConfiguration +import apoc.kafka.events.* +import apoc.kafka.producer.toMap + + +class StreamsEventMetaBuilder { + + private var timestamp: Long? = null + private var username: String? = null + private var txId: Long? = null + private var txEventId: Int? = null + private var txEventsCount: Int? = null + private var operation: OperationType? = null + private var source: MutableMap = mutableMapOf() + + fun withTimestamp(timestamp : Long) : StreamsEventMetaBuilder { + this.timestamp = timestamp + return this + } + + fun withUsername(username : String) : StreamsEventMetaBuilder { + this.username = username + return this + } + + fun withTransactionId(txId : Long) : StreamsEventMetaBuilder { + this.txId = txId + return this + } + + fun withTransactionEventId(txEventId : Int) : StreamsEventMetaBuilder { + this.txEventId = txEventId + return this + } + + fun withTransactionEventsCount(txEventsCount : Int) : StreamsEventMetaBuilder { + this.txEventsCount = txEventsCount + return this + } + + fun withOperation(op : OperationType) : StreamsEventMetaBuilder { + this.operation = op + return this + } + + fun withSource(key : String, value : Any) : StreamsEventMetaBuilder { + this.source.put(key, value) + return this + } + + fun withHostname(host : String) : StreamsEventMetaBuilder { + this.source.put("hostname", host) + return this + } + + fun build() : Meta { + return Meta(timestamp!!, username!!, txId!!, txEventId!!, txEventsCount!!, operation!!, source) + } + +} + +class NodeChangeBuilder { + + private var labels : List = listOf() + private var properties : Map = mapOf() + + fun withLabels(labels : List) : NodeChangeBuilder { + this.labels = labels + return this + } + + fun withProperties(properties : Map) : NodeChangeBuilder { + this.properties = properties + return this + } + + fun build() : NodeChange { + return NodeChange(properties = properties, labels = labels) + } +} + +class NodePayloadBuilder { + + private var id : String = "0" + private var after : NodeChange? = null + private var before : NodeChange? = null + + fun withId(id : String) : NodePayloadBuilder { + this.id = id + return this + } + + fun withBefore(before : NodeChange) : NodePayloadBuilder { + this.before = before + return this + } + + fun withAfter(after : NodeChange) : NodePayloadBuilder { + this.after = after + return this + } + + fun build() : NodePayload { + return NodePayload(id, before, after) + } +} + +class RelationshipChangeBuilder { + + private var properties : Map = mapOf() + + fun withProperties(properties : Map) : RelationshipChangeBuilder { + this.properties = properties + return this + } + + fun build() : RelationshipChange { + return RelationshipChange(properties= properties) + } +} + +class RelationshipPayloadBuilder { + private var id: String = "0" + private var after: RelationshipChange? = null + private var before: RelationshipChange? = null + private var name: String? = null + private var startNode : RelationshipNodeChange? = null + private var endNode : RelationshipNodeChange? = null + + fun withStartNode(id: String, labels: List, ids: Map): RelationshipPayloadBuilder { + this.startNode = RelationshipNodeChange(id, labels, ids) + return this + } + + fun withEndNode(id: String, labels: List, ids: Map): RelationshipPayloadBuilder { + this.endNode = RelationshipNodeChange(id, labels, ids) + return this + } + + fun withId(id: String): RelationshipPayloadBuilder { + this.id = id + return this + } + + fun withBefore(before: RelationshipChange): RelationshipPayloadBuilder { + this.before = before + return this + } + + fun withAfter(after: RelationshipChange): RelationshipPayloadBuilder { + this.after = after + return this + } + + fun withName(name: String): RelationshipPayloadBuilder { + this.name = name + return this + } + + fun build(): RelationshipPayload { + return RelationshipPayload(id = id, before = before, after = after, label = name!!, start = startNode!!, end = endNode!! ) + } +} + +class SchemaBuilder { + + private lateinit var payload: Payload + private lateinit var constraints: Set + + fun withPayload(payload: Payload): SchemaBuilder { + this.payload = payload + return this + } + + fun withConstraints(constraints: Set): SchemaBuilder { + this.constraints = constraints + return this + } + + private fun mapPropertiesToTypes(properties: RecordChange?): Map { + return properties?.properties + ?.mapValues { + val clazz = it.value::class + if (clazz.java.isArray) { + "${it.value::class.java.componentType.simpleName}[]" + } else { + it.value::class.java.simpleName + } + } + .orEmpty() + } + + fun build(): Schema { + return Schema(mapPropertiesToTypes(payload.after ?: payload.before), constraints.toList()) + } +} + +class StreamsTransactionEventBuilder { + + private var meta: Meta? = null + private var payload: Payload? = null + private var schema: Schema? = null + + fun withMeta(meta : Meta): StreamsTransactionEventBuilder { + this.meta = meta + return this + } + + fun withPayload(payload : Payload): StreamsTransactionEventBuilder { + this.payload = payload + return this + } + + fun withSchema(schema : Schema): StreamsTransactionEventBuilder { + this.schema = schema + return this + } + + fun build(): StreamsTransactionEvent { + return StreamsTransactionEvent(meta!!, payload!!, schema!!) + } +} + +class StreamsEventBuilder { + + private lateinit var payload: Any + private lateinit var topic: String + private var nodeRoutingConfiguration: NodeRoutingConfiguration? = null + private var relationshipRoutingConfiguration: RelationshipRoutingConfiguration? = null + + fun withPayload(payload: Any): StreamsEventBuilder { + this.payload = payload + return this + } + + fun withTopic(topic: String): StreamsEventBuilder { + this.topic = topic + return this + } + + fun withNodeRoutingConfiguration(nodeRoutingConfiguration: NodeRoutingConfiguration?): StreamsEventBuilder { + this.nodeRoutingConfiguration = nodeRoutingConfiguration + return this + } + + fun withRelationshipRoutingConfiguration(relationshipRoutingConfiguration: RelationshipRoutingConfiguration?): StreamsEventBuilder { + this.relationshipRoutingConfiguration = relationshipRoutingConfiguration + return this + } + + private fun buildPayload(topic: String, payload: Any?): Any? { + if (payload == null) { + return null + } + return when (payload) { + is Node -> { + if (nodeRoutingConfiguration != null) { + nodeRoutingConfiguration!!.filter(payload) + } else { + payload.toMap() + } + } + is Relationship -> { + if (relationshipRoutingConfiguration != null) { + relationshipRoutingConfiguration!!.filter(payload) + } else { + payload.toMap() + } + } + is Path -> { + val length = payload.length() + val rels = payload.relationships().map { buildPayload(topic, it) } + val nodes = payload.nodes().map { buildPayload(topic, it) } + mapOf("length" to length, "rels" to rels, "nodes" to nodes) + } + is Map<*, *> -> { + payload.mapValues { buildPayload(topic, it.value) } + } + is List<*> -> { + payload.map { buildPayload(topic, it) } + } + else -> { + payload + } + } + } + + fun build(): StreamsEvent { + return StreamsEvent(buildPayload(topic, payload)!!) + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaAdminService.kt b/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaAdminService.kt new file mode 100644 index 0000000000..60fa90b976 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaAdminService.kt @@ -0,0 +1,57 @@ +package apoc.kafka.producer.kafka + +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancelAndJoin +import kotlinx.coroutines.delay +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.apache.kafka.clients.admin.AdminClient +import org.neo4j.logging.Log +import apoc.kafka.utils.KafkaUtil.isAutoCreateTopicsEnabled +import apoc.kafka.utils.KafkaUtil.getInvalidTopics +import apoc.kafka.utils.KafkaUtil +import java.util.Collections +import java.util.concurrent.ConcurrentHashMap + +class KafkaAdminService(private val props: KafkaConfiguration, private val allTopics: List, private val log: Log) { + private val client = AdminClient.create(props.asProperties()) + private val kafkaTopics: MutableSet = Collections.newSetFromMap(ConcurrentHashMap()) + private val isAutoCreateTopicsEnabled = isAutoCreateTopicsEnabled(client) + private lateinit var job: Job + + fun start() { + if (!isAutoCreateTopicsEnabled) { + job = GlobalScope.launch(Dispatchers.IO) { + while (isActive) { + try { + kafkaTopics += client.listTopics().names().get() + } catch (e: Exception) { + log.warn("""Cannot retrieve valid topics because the following exception, + |next attempt is in ${props.topicDiscoveryPollingInterval} ms: + """.trimMargin(), e) + } + delay(props.topicDiscoveryPollingInterval) + } + client.close() + } + } + } + + fun stop() { + KafkaUtil.ignoreExceptions({ + runBlocking { + job.cancelAndJoin() + } + }, UninitializedPropertyAccessException::class.java) + } + + fun isValidTopic(topic: String) = when (isAutoCreateTopicsEnabled) { + true -> true + else -> kafkaTopics.contains(topic) + } + + fun getInvalidTopics() = getInvalidTopics(client, allTopics) +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaConfiguration.kt b/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaConfiguration.kt new file mode 100644 index 0000000000..0e3eed5342 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaConfiguration.kt @@ -0,0 +1,105 @@ +package apoc.kafka.producer.kafka + +import org.apache.commons.lang3.StringUtils +import org.apache.kafka.clients.CommonClientConfigs +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.common.config.TopicConfig +import org.apache.kafka.common.serialization.ByteArraySerializer +import org.neo4j.logging.Log +import apoc.kafka.extensions.getInt +import apoc.kafka.extensions.toPointCase +import apoc.kafka.utils.JSONUtils +import apoc.kafka.utils.KafkaUtil.validateConnection +import java.util.Properties +import java.util.concurrent.TimeUnit + +enum class LogStrategy { delete, compact } + +private val configPrefix = "apoc.kafka." + +data class KafkaConfiguration(val bootstrapServers: String = "localhost:9092", + val acks: String = "1", + val retries: Int = 2, + val batchSize: Int = 16384, + val bufferMemory: Int = 33554432, + val reindexBatchSize: Int = 1000, + val sessionTimeoutMs: Int = 15 * 1000, + val connectionTimeoutMs: Int = 10 * 1000, + val replication: Int = 1, + val transactionalId: String = StringUtils.EMPTY, + val lingerMs: Int = 1, + val topicDiscoveryPollingInterval: Long = TimeUnit.MINUTES.toMillis(5), + val logCompactionStrategy: String = LogStrategy.delete.toString(), + val extraProperties: Map = emptyMap()) { + + companion object { + // Visible for testing + fun create(cfg: Map): KafkaConfiguration { + val config = cfg.filterKeys { it.startsWith(configPrefix) }.mapKeys { it.key.substring(configPrefix.length) } + + val default = KafkaConfiguration() + + val keys = JSONUtils.asMap(default).keys.map { it.toPointCase() } + val extraProperties = config.filterKeys { !keys.contains(it) } + + return default.copy(bootstrapServers = config.getOrDefault("bootstrap.servers", default.bootstrapServers), + acks = config.getOrDefault("acks", default.acks), + retries = config.getInt("retries", default.retries), + batchSize = config.getInt("batch.size", default.batchSize), + bufferMemory = config.getInt("buffer.memory", default.bufferMemory), + reindexBatchSize = config.getInt("reindex.batch.size", default.reindexBatchSize), + sessionTimeoutMs = config.getInt("session.timeout.ms", default.sessionTimeoutMs), + connectionTimeoutMs = config.getInt("connection.timeout.ms", default.connectionTimeoutMs), + replication = config.getInt("replication", default.replication), + transactionalId = config.getOrDefault("transactional.id", default.transactionalId), + lingerMs = config.getInt("linger.ms", default.lingerMs), + topicDiscoveryPollingInterval = config.getOrDefault("topic.discovery.polling.interval", + default.topicDiscoveryPollingInterval).toString().toLong(), + logCompactionStrategy = config.getOrDefault("log.compaction.strategy", default.logCompactionStrategy), + extraProperties = extraProperties // for what we don't provide a default configuration + ) + } + + fun from(cfg: Map, log: Log): KafkaConfiguration { + val kafkaCfg = create(cfg) + validate(kafkaCfg, cfg, log) + return kafkaCfg + } + + private fun validate(config: KafkaConfiguration, rawConfig: Map, log: Log? = null) { + validateConnection(config.bootstrapServers, CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, false) + try { + LogStrategy.valueOf(config.logCompactionStrategy) + } catch (e: IllegalArgumentException) { + log?.warn("Invalid log compaction strategy setting, switching to default value ${TopicConfig.CLEANUP_POLICY_DELETE}") + config.logCompactionStrategy.apply { LogStrategy.delete.toString() } + } + } + + } + + fun asProperties(): Properties { + val props = Properties() + val map = JSONUtils.asMap(this) + .filter { + if (it.key == "transactionalId") { + it.value != StringUtils.EMPTY + } else { + true + } + } + .mapKeys { it.key.toPointCase() } + props.putAll(map) + props.putAll(extraProperties) + props.putAll(addSerializers()) // Fixed serializers + return props + } + + private fun addSerializers() : Properties { + val props = Properties() + props[ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG] = ByteArraySerializer::class.java + props[ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG] = ByteArraySerializer::class.java + return props + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaEventRouter.kt b/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaEventRouter.kt new file mode 100644 index 0000000000..958f5c481d --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/kafka/KafkaEventRouter.kt @@ -0,0 +1,203 @@ +package apoc.kafka.producer.kafka + +import apoc.kafka.events.StreamsEvent +import apoc.kafka.events.StreamsPluginStatus +import apoc.kafka.events.StreamsTransactionEvent +import apoc.kafka.extensions.isDefaultDb +import apoc.kafka.producer.StreamsEventRouter +import apoc.kafka.producer.StreamsEventRouterConfiguration +import apoc.kafka.producer.asSourceRecordKey +import apoc.kafka.producer.asSourceRecordValue +import apoc.kafka.producer.toMap +import apoc.kafka.utils.JSONUtils +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.getInvalidTopicsError +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import org.apache.kafka.clients.producer.KafkaProducer +import org.apache.kafka.clients.producer.ProducerRecord +import org.apache.kafka.common.KafkaException +import org.apache.kafka.common.errors.AuthorizationException +import org.apache.kafka.common.errors.OutOfOrderSequenceException +import org.apache.kafka.common.errors.ProducerFencedException +import org.neo4j.graphdb.GraphDatabaseService +import org.neo4j.logging.Log +import java.util.* + + +class KafkaEventRouter(private val config: Map, + private val db: GraphDatabaseService, + private val log: Log): StreamsEventRouter(config, db, log) { + + override val eventRouterConfiguration: StreamsEventRouterConfiguration = StreamsEventRouterConfiguration + .from(config, db.databaseName(), db.isDefaultDb(), log) + + + private val mutex = Mutex() + + private var producer: Neo4jKafkaProducer? = null + private val kafkaConfig by lazy { KafkaConfiguration.from(config, log) } + private val kafkaAdminService by lazy { KafkaAdminService(kafkaConfig, eventRouterConfiguration.allTopics(), log) } + + override fun printInvalidTopics() { + val invalidTopics = kafkaAdminService.getInvalidTopics() + if (invalidTopics.isNotEmpty()) { + log.warn(getInvalidTopicsError(invalidTopics)) + } + } + + private fun status(producer: Neo4jKafkaProducer<*, *>?): StreamsPluginStatus = when (producer != null) { + true -> StreamsPluginStatus.RUNNING + else -> StreamsPluginStatus.STOPPED + } + + override fun start() = runBlocking { + mutex.withLock(producer) { + if (status(producer) == StreamsPluginStatus.RUNNING) { + return@runBlocking + } + log.info("Initialising Kafka Connector") + kafkaAdminService.start() + val props = kafkaConfig.asProperties() + producer = Neo4jKafkaProducer(props) + producer!!.initTransactions() + log.info("Kafka Connector started") + } + } + + override fun stop() = runBlocking { + mutex.withLock(producer) { + if (status(producer) == StreamsPluginStatus.STOPPED) { + return@runBlocking + } + KafkaUtil.ignoreExceptions({ producer?.flush() }, UninitializedPropertyAccessException::class.java) + KafkaUtil.ignoreExceptions({ producer?.close() }, UninitializedPropertyAccessException::class.java) + KafkaUtil.ignoreExceptions({ kafkaAdminService.stop() }, UninitializedPropertyAccessException::class.java) + producer = null + } + } + + private fun send(producerRecord: ProducerRecord, sync: Boolean = false): Map? { + if (!kafkaAdminService.isValidTopic(producerRecord.topic())) { + if (log.isDebugEnabled) { + log.debug("Error while sending record to ${producerRecord.topic()}, because it doesn't exists") + } + // TODO add logging system here + return null + } + return if (sync) { + producer?.send(producerRecord)?.get()?.toMap() + } else { + producer?.send(producerRecord) { meta, error -> + if (meta != null && log.isDebugEnabled) { + log.debug("Successfully sent record in partition ${meta.partition()} offset ${meta.offset()} data ${meta.topic()} key size ${meta.serializedKeySize()}") + } + if (error != null) { + if (log.isDebugEnabled) { + log.debug("Error while sending record to ${producerRecord.topic()}, because of the following exception:", error) + } + // TODO add logging system here + } + } + null + } + } + + // this method is used by the procedures + private fun sendEvent(topic: String, event: StreamsEvent, config: Map, sync: Boolean = false): Map? { + if (log.isDebugEnabled) { + log.debug("Trying to send a simple event with payload ${event.payload} to kafka") + } + // in the procedures we allow to define a custom message key via the configuration property key + // in order to have the backwards compatibility we define as default value the old key + val key = config.getOrDefault("key", UUID.randomUUID().toString()) + val partition = (config["partition"])?.toString()?.toInt() + + val producerRecord = ProducerRecord(topic, partition, System.currentTimeMillis(), key?.let { JSONUtils.writeValueAsBytes(it) }, + JSONUtils.writeValueAsBytes(event)) + return send(producerRecord, sync) + } + + // this method is used by the transaction event handler + private fun sendEvent(topic: String, event: StreamsTransactionEvent, config: Map) { + if (log.isDebugEnabled) { + log.debug("Trying to send a transaction event with txId ${event.meta.txId} and txEventId ${event.meta.txEventId} to kafka") + } + val key = JSONUtils.writeValueAsBytes(event.asSourceRecordKey(kafkaConfig.logCompactionStrategy)) + val value = event.asSourceRecordValue(kafkaConfig.logCompactionStrategy)?.let { JSONUtils.writeValueAsBytes(it) } + + val producerRecord = ProducerRecord(topic, null, System.currentTimeMillis(), key, value) + send(producerRecord) + } + + override fun sendEventsSync(topic: String, transactionEvents: List, config: Map): List> { + producer?.beginTransaction() + + val results = transactionEvents.mapNotNull { + sendEvent(topic, it, config, true) + } + producer?.commitTransaction() + + return results + } + + override fun sendEvents(topic: String, transactionEvents: List, config: Map) { + try { + producer?.beginTransaction() + transactionEvents.forEach { + if (it is StreamsTransactionEvent) { + sendEvent(topic, it, config) + } else { + sendEvent(topic, it, config) + } + } + producer?.commitTransaction() + } catch (e: ProducerFencedException) { + log.error("Another producer with the same transactional.id has been started. Stack trace is:", e) + producer?.close() + } catch (e: OutOfOrderSequenceException) { + log.error("The broker received an unexpected sequence number from the producer. Stack trace is:", e) + producer?.close() + } catch (e: AuthorizationException) { + log.error("Error in authorization. Stack trace is:", e) + producer?.close() + } catch (e: KafkaException) { + log.error("Generic kafka error. Stack trace is:", e) + producer?.abortTransaction() + } + } + +} + +class Neo4jKafkaProducer: KafkaProducer { + private val isTransactionEnabled: Boolean + constructor(props: Properties): super(props) { + isTransactionEnabled = props.containsKey("transactional.id") + } + + override fun initTransactions() { + if (isTransactionEnabled) { + super.initTransactions() + } + } + + override fun beginTransaction() { + if (isTransactionEnabled) { + super.beginTransaction() + } + } + + override fun commitTransaction() { + if (isTransactionEnabled) { + super.commitTransaction() + } + } + + override fun abortTransaction() { + if (isTransactionEnabled) { + super.abortTransaction() + } + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/producer/procedures/StreamsProcedures.kt b/extended/src/main/kotlin/apoc/kafka/producer/procedures/StreamsProcedures.kt new file mode 100644 index 0000000000..fb7821639d --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/producer/procedures/StreamsProcedures.kt @@ -0,0 +1,117 @@ +//package apoc.kafka.producer.procedures +// +//import kotlinx.coroutines.runBlocking +//import org.neo4j.graphdb.GraphDatabaseService +//import org.neo4j.kernel.internal.GraphDatabaseAPI +//import org.neo4j.logging.Log +//import org.neo4j.procedure.Context +//import org.neo4j.procedure.Description +//import org.neo4j.procedure.Mode +//import org.neo4j.procedure.Name +//import org.neo4j.procedure.Procedure +//import apoc.kafka.producer.StreamsEventRouter +//import apoc.kafka.producer.StreamsTransactionEventHandler +//import apoc.kafka.producer.events.StreamsEventBuilder +//import apoc.kafka.utils.StreamsUtils +//import java.util.concurrent.ConcurrentHashMap +//import java.util.stream.Stream +// +//data class StreamPublishResult(@JvmField val value: Map) +// +//data class StreamsEventSinkStoreEntry(val eventRouter: StreamsEventRouter, +// val txHandler: StreamsTransactionEventHandler +//) +//class StreamsProcedures { +// +// @JvmField @Context +// var db: GraphDatabaseService? = null +// +// @JvmField @Context var log: Log? = null +// +// @Procedure(mode = Mode.READ, name = "apoc.kafka.publish.sync") +// @Description("apoc.kafka.publish.sync(topic, payload, config) - Allows custom synchronous streaming from Neo4j to the configured stream environment") +// fun sync(@Name("topic") topic: String?, @Name("payload") payload: Any?, +// @Name(value = "config", defaultValue = "{}") config: Map?): Stream { +// checkEnabled() +// if (isTopicNullOrEmpty(topic)) { +// return Stream.empty() +// } +// checkPayloadNotNull(payload) +// +// val streamsEvent = buildStreamEvent(topic!!, payload!!) +// return getStreamsEventSinkStoreEntry().eventRouter +// .sendEventsSync(topic, listOf(streamsEvent), config ?: emptyMap()) +// .map { StreamPublishResult(it) } +// .stream() +// } +// +// @Procedure(mode = Mode.READ, name = "apoc.kafka.publish") +// @Description("apoc.kafka.publish(topic, payload, config) - Allows custom streaming from Neo4j to the configured stream environment") +// fun publish(@Name("topic") topic: String?, @Name("payload") payload: Any?, +// @Name(value = "config", defaultValue = "{}") config: Map?) = runBlocking { +// checkEnabled() +// if (isTopicNullOrEmpty(topic)) { +// return@runBlocking +// } +// checkPayloadNotNull(payload) +// +// val streamsEvent = buildStreamEvent(topic!!, payload!!) +// getStreamsEventSinkStoreEntry().eventRouter.sendEvents(topic, listOf(streamsEvent), config ?: emptyMap()) +// } +// +// private fun checkEnabled() { +// if (!getStreamsEventSinkStoreEntry().eventRouter.eventRouterConfiguration.proceduresEnabled) { +// throw RuntimeException("In order to use the procedure you must set apoc.kafka.procedures.enabled=true") +// } +// } +// +// private fun isTopicNullOrEmpty(topic: String?): Boolean { +// return if (topic.isNullOrEmpty()) { +// log?.info("Topic empty, no message sent") +// true +// } else { +// false +// } +// } +// +// private fun checkPayloadNotNull(payload: Any?) { +// if (payload == null) { +// log?.error("Payload empty, no message sent") +// throw RuntimeException("Payload may not be null") +// } +// } +// +// private fun buildStreamEvent(topic: String, payload: Any) = StreamsEventBuilder() +// .withPayload(payload) +// .withNodeRoutingConfiguration(getStreamsEventSinkStoreEntry() +// .eventRouter +// .eventRouterConfiguration +// .nodeRouting +// .firstOrNull { it.topic == topic }) +// .withRelationshipRoutingConfiguration(getStreamsEventSinkStoreEntry() +// .eventRouter +// .eventRouterConfiguration +// .relRouting +// .firstOrNull { it.topic == topic }) +// .withTopic(topic) +// .build() +// +// private fun getStreamsEventSinkStoreEntry() = streamsEventRouterStore[db!!.databaseName()]!! +// +// companion object { +// +// private val streamsEventRouterStore = ConcurrentHashMap() +// +// fun register( +// db: GraphDatabaseAPI, +// evtRouter: StreamsEventRouter, +// txHandler: StreamsTransactionEventHandler +// ) { +// streamsEventRouterStore[StreamsUtils.getName(db)] = StreamsEventSinkStoreEntry(evtRouter, txHandler) +// } +// +// fun unregister(db: GraphDatabaseAPI) { +// streamsEventRouterStore.remove(StreamsUtils.getName(db)) +// } +// } +//} diff --git a/extended/src/main/kotlin/apoc/kafka/service/StreamsSinkService.kt b/extended/src/main/kotlin/apoc/kafka/service/StreamsSinkService.kt new file mode 100644 index 0000000000..80343b6f74 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/StreamsSinkService.kt @@ -0,0 +1,42 @@ +package apoc.kafka.service + +import apoc.kafka.service.sink.strategy.IngestionStrategy + + +const val STREAMS_TOPIC_KEY: String = "apoc.kafka.sink.topic" +const val STREAMS_TOPIC_CDC_KEY: String = "apoc.kafka.sink.topic.cdc" + +enum class TopicTypeGroup { CYPHER, CDC, PATTERN, CUD } +enum class TopicType(val group: TopicTypeGroup, val key: String) { + CDC_SOURCE_ID(group = TopicTypeGroup.CDC, key = "$STREAMS_TOPIC_CDC_KEY.sourceId"), + CYPHER(group = TopicTypeGroup.CYPHER, key = "$STREAMS_TOPIC_KEY.cypher"), + PATTERN_NODE(group = TopicTypeGroup.PATTERN, key = "$STREAMS_TOPIC_KEY.pattern.node"), + PATTERN_RELATIONSHIP(group = TopicTypeGroup.PATTERN, key = "$STREAMS_TOPIC_KEY.pattern.relationship"), + CDC_SCHEMA(group = TopicTypeGroup.CDC, key = "$STREAMS_TOPIC_CDC_KEY.schema"), + CUD(group = TopicTypeGroup.CUD, key = "$STREAMS_TOPIC_KEY.cud") +} + +data class StreamsSinkEntity(val key: Any?, val value: Any?) + +abstract class StreamsStrategyStorage { + abstract fun getTopicType(topic: String): TopicType? + + abstract fun getStrategy(topic: String): IngestionStrategy +} + +abstract class StreamsSinkService(private val streamsStrategyStorage: StreamsStrategyStorage) { + + abstract fun write(query: String, events: Collection) + + private fun writeWithStrategy(data: Collection, strategy: IngestionStrategy) { + strategy.mergeNodeEvents(data).forEach { write(it.query, it.events) } + strategy.deleteNodeEvents(data).forEach { write(it.query, it.events) } + + strategy.mergeRelationshipEvents(data).forEach { write(it.query, it.events) } + strategy.deleteRelationshipEvents(data).forEach { write(it.query, it.events) } + } + + fun writeForTopic(topic: String, params: Collection) { + writeWithStrategy(params, streamsStrategyStorage.getStrategy(topic)) + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/Topics.kt b/extended/src/main/kotlin/apoc/kafka/service/Topics.kt new file mode 100644 index 0000000000..30009730db --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/Topics.kt @@ -0,0 +1,127 @@ +package apoc.kafka.service + +import apoc.kafka.service.sink.strategy.* +import kotlin.reflect.jvm.javaType + +class TopicValidationException(message: String): RuntimeException(message) + +private fun TopicType.replaceKeyBy(replacePrefix: Pair) = if (replacePrefix.first.isNullOrBlank()) + this.key + else + this.key.replace(replacePrefix.first, replacePrefix.second) + +data class Topics(val cypherTopics: Map = emptyMap(), + val cdcSourceIdTopics: Set = emptySet(), + val cdcSchemaTopics: Set = emptySet(), + val cudTopics: Set = emptySet(), + val nodePatternTopics: Map = emptyMap(), + val relPatternTopics: Map = emptyMap(), + val invalid: List = emptyList()) { + + operator fun plus(other: Topics): Topics { + return Topics(cypherTopics = this.cypherTopics + other.cypherTopics, + cdcSourceIdTopics = this.cdcSourceIdTopics + other.cdcSourceIdTopics, + cdcSchemaTopics = this.cdcSchemaTopics + other.cdcSchemaTopics, + cudTopics = this.cudTopics + other.cudTopics, + nodePatternTopics = this.nodePatternTopics + other.nodePatternTopics, + relPatternTopics = this.relPatternTopics + other.relPatternTopics, + invalid = this.invalid + other.invalid) + } + + fun allTopics(): List = this.asMap() + .map { + if (it.key.group == TopicTypeGroup.CDC || it.key.group == TopicTypeGroup.CUD) { + (it.value as Set).toList() + } else { + (it.value as Map).keys.toList() + } + } + .flatten() + + fun asMap(): Map = mapOf(TopicType.CYPHER to cypherTopics, TopicType.CUD to cudTopics, + TopicType.CDC_SCHEMA to cdcSchemaTopics, TopicType.CDC_SOURCE_ID to cdcSourceIdTopics, + TopicType.PATTERN_NODE to nodePatternTopics, TopicType.PATTERN_RELATIONSHIP to relPatternTopics) + + companion object { + fun from(map: Map, replacePrefix: Pair = ("" to ""), dbName: String = "", invalidTopics: List = emptyList()): Topics { + val config = map + .filterKeys { if (dbName.isNotBlank()) it.toLowerCase().endsWith(".to.$dbName") else !it.contains(".to.") } + .mapKeys { if (dbName.isNotBlank()) it.key.replace(".to.$dbName", "", true) else it.key } + val cypherTopicPrefix = TopicType.CYPHER.replaceKeyBy(replacePrefix) + val sourceIdKey = TopicType.CDC_SOURCE_ID.replaceKeyBy(replacePrefix) + val schemaKey = TopicType.CDC_SCHEMA.replaceKeyBy(replacePrefix) + val cudKey = TopicType.CUD.replaceKeyBy(replacePrefix) + val nodePatterKey = TopicType.PATTERN_NODE.replaceKeyBy(replacePrefix) + val relPatterKey = TopicType.PATTERN_RELATIONSHIP.replaceKeyBy(replacePrefix) + val cypherTopics = TopicUtils.filterByPrefix(config, cypherTopicPrefix) + val nodePatternTopics = TopicUtils + .filterByPrefix(config, nodePatterKey, invalidTopics) + .mapValues { NodePatternConfiguration.parse(it.value) } + val relPatternTopics = TopicUtils + .filterByPrefix(config, relPatterKey, invalidTopics) + .mapValues { RelationshipPatternConfiguration.parse(it.value) } + val cdcSourceIdTopics = TopicUtils.splitTopics(config[sourceIdKey] as? String, invalidTopics) + val cdcSchemaTopics = TopicUtils.splitTopics(config[schemaKey] as? String, invalidTopics) + val cudTopics = TopicUtils.splitTopics(config[cudKey] as? String, invalidTopics) + return Topics(cypherTopics, cdcSourceIdTopics, cdcSchemaTopics, cudTopics, nodePatternTopics, relPatternTopics) + } + } +} + +object TopicUtils { + + @JvmStatic val TOPIC_SEPARATOR = ";" + + fun filterByPrefix(config: Map<*, *>, prefix: String, invalidTopics: List = emptyList()): Map { + val fullPrefix = "$prefix." + return config + .filterKeys { it.toString().startsWith(fullPrefix) } + .mapKeys { it.key.toString().replace(fullPrefix, "") } + .filterKeys { !invalidTopics.contains(it) } + .mapValues { it.value.toString() } + } + + fun splitTopics(cdcMergeTopicsString: String?, invalidTopics: List = emptyList()): Set { + return if (cdcMergeTopicsString.isNullOrBlank()) { + emptySet() + } else { + cdcMergeTopicsString.split(TOPIC_SEPARATOR) + .filter { !invalidTopics.contains(it) } + .toSet() + } + } + + inline fun validate(topics: Topics) { + val exceptionStringConstructor = T::class.constructors + .first { it.parameters.size == 1 && it.parameters[0].type.javaType == String::class.java }!! + val crossDefinedTopics = topics.allTopics() + .groupBy({ it }, { 1 }) + .filterValues { it.sum() > 1 } + .keys + if (crossDefinedTopics.isNotEmpty()) { + throw exceptionStringConstructor + .call("The following topics are cross defined: $crossDefinedTopics") + } + } + + fun toStrategyMap(topics: Topics, sourceIdStrategyConfig: SourceIdIngestionStrategyConfig): Map { + return topics.asMap() + .filterKeys { it != TopicType.CYPHER } + .mapValues { (type, config) -> + when (type) { + TopicType.CDC_SOURCE_ID -> SourceIdIngestionStrategy(sourceIdStrategyConfig) + TopicType.CDC_SCHEMA -> SchemaIngestionStrategy() + TopicType.CUD -> CUDIngestionStrategy() + TopicType.PATTERN_NODE -> { + val map = config as Map + map.mapValues { NodePatternIngestionStrategy(it.value) } + } + TopicType.PATTERN_RELATIONSHIP -> { + val map = config as Map + map.mapValues { RelationshipPatternIngestionStrategy(it.value) } + } + else -> throw RuntimeException("Unsupported topic type $type") + } + } + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/errors/ErrorService.kt b/extended/src/main/kotlin/apoc/kafka/service/errors/ErrorService.kt new file mode 100644 index 0000000000..07db7eb958 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/errors/ErrorService.kt @@ -0,0 +1,125 @@ +package apoc.kafka.service.errors + +import org.apache.avro.generic.GenericRecord +import org.apache.kafka.clients.consumer.ConsumerRecord +import org.apache.kafka.common.record.RecordBatch +import apoc.kafka.extensions.toMap +import apoc.kafka.utils.JSONUtils +import java.util.* + + +data class ErrorData(val originalTopic: String, + val timestamp: Long, + val key: ByteArray?, + val value: ByteArray?, + val partition: String, + val offset: String, + val executingClass: Class<*>?, + val databaseName: String?, + val exception: Exception?) { + + constructor(originalTopic: String, timestamp: Long?, key: Any?, value: Any?, + partition: Int, offset: Long, executingClass: Class<*>?, databaseName: String?, exception: Exception?) : + this(originalTopic, timestamp ?: RecordBatch.NO_TIMESTAMP, toByteArray(key), toByteArray(value), partition.toString(),offset.toString(), executingClass, databaseName, exception) + + companion object { + + fun from(consumerRecord: ConsumerRecord, exception: Exception?, executingClass: Class<*>?, databaseName: String?): ErrorData { + return ErrorData(offset = consumerRecord.offset().toString(), + originalTopic = consumerRecord.topic(), + partition = consumerRecord.partition().toString(), + timestamp = consumerRecord.timestamp(), + exception = exception, + executingClass = executingClass, + key = toByteArray(consumerRecord.key()), + value = toByteArray(consumerRecord.value()), + databaseName = databaseName) + } + + fun toByteArray(v:Any?) = try { + when (v) { + null -> null + is ByteArray -> v + is GenericRecord -> JSONUtils.writeValueAsBytes(mapOf("schema" to v.schema.toMap(), "record" to v.toMap())) + else -> v.toString().toByteArray(Charsets.UTF_8) + } + } catch (e:Exception) { + null + } + } + fun toLogString() = + """ +ErrorData(originalTopic=$originalTopic, timestamp=$timestamp, partition=$partition, offset=$offset, exception=$exception, key=${key?.toString(Charsets.UTF_8)}, value=${value?.sliceArray(0..Math.min(value.size,200)-1)?.toString(Charsets.UTF_8)}, executingClass=$executingClass) + """.trimIndent() + +} + +abstract class ErrorService(private val config: Map = emptyMap()) { + + data class ErrorConfig(val fail:Boolean=false, val log:Boolean=false, val logMessages:Boolean=false, + val dlqTopic:String? = null, val dlqHeaderPrefix:String = "", val dlqHeaders:Boolean = false, val dlqReplication: Int? = 3) { + + /* + https://www.confluent.io/blog/kafka-connect-deep-dive-error-handling-dead-letter-queues + "errors.retry.timeout": "-1", + "errors.retry.delay.max.ms": "1000", + + "errors.tolerance": "all", "none" == fail-fast, abort sink task + + fail-fast for configuration errors (e.g. validate cypher statements on start) + errors.tolerance = all -> silently ignore all bad messages + + org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java + + + "errors.log.enable": true, + "errors.deadletterqueue.context.headers.enable"=true/false + "errors.deadletterqueue.topic.name": "test-error-topic", + "errors.deadletterqueue.topic.replication.factor": 1, + "errors.log.include.messages": true, + */ + + companion object { + const val TOLERANCE = "errors.tolerance" + const val LOG = "errors.log.enable" + const val LOG_MESSAGES = "errors.log.include.messages" + const val DLQ_TOPIC = "errors.deadletterqueue.topic.name" + const val DLQ_HEADERS = "errors.deadletterqueue.context.headers.enable" + const val DLQ_HEADER_PREFIX = "errors.deadletterqueue.context.headers.prefix" + const val DLQ_REPLICATION = "errors.deadletterqueue.topic.replication.factor" + + fun from(props: Properties) = from(props.toMap() as Map) + + fun boolean(v:Any?) = when (v) { + null -> false + "true" -> true + "false" -> false + is Boolean -> v + else -> false + } + fun int(v:Any?) = when (v) { + null -> 0 + is Int -> v + is String -> v.toInt() + else -> 0 + } + + fun from(config: Map) = + ErrorConfig( + fail = config.getOrDefault(TOLERANCE, "none") == "none", + log = boolean(config.get(LOG)), + logMessages = boolean(config.get(LOG_MESSAGES)), + dlqTopic = config.get(DLQ_TOPIC) as String?, + dlqHeaders = boolean(config.get(DLQ_HEADERS)), + dlqHeaderPrefix = config.getOrDefault(DLQ_HEADER_PREFIX,"") as String, + dlqReplication = int(config.getOrDefault(DLQ_REPLICATION, 3))) + } + } + + abstract fun report(errorDatas: List) + + open fun close() {} +} + +class ProcessingError(val errorDatas: List) : + RuntimeException("Error processing ${errorDatas.size} messages\n"+errorDatas.map { it.toLogString() }.joinToString("\n")) diff --git a/extended/src/main/kotlin/apoc/kafka/service/errors/KafkaErrorService.kt b/extended/src/main/kotlin/apoc/kafka/service/errors/KafkaErrorService.kt new file mode 100644 index 0000000000..864a4deb3e --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/errors/KafkaErrorService.kt @@ -0,0 +1,97 @@ +package apoc.kafka.service.errors + +import org.apache.commons.lang3.exception.ExceptionUtils +import org.apache.kafka.clients.CommonClientConfigs +import org.apache.kafka.clients.producer.KafkaProducer +import org.apache.kafka.clients.producer.Producer +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.clients.producer.ProducerRecord +import org.apache.kafka.common.record.RecordBatch +import org.apache.kafka.common.serialization.ByteArraySerializer +import org.neo4j.util.VisibleForTesting +import apoc.kafka.utils.KafkaUtil.validateConnection +import java.util.* + +class KafkaErrorService(private val producer: Producer?, private val errorConfig: ErrorConfig, private val log: (String, Exception?)->Unit): ErrorService() { + + constructor(config: Properties, errorConfig: ErrorConfig, + log: (String, Exception?) -> Unit) : this(producer(errorConfig, config, log), errorConfig, log) + + companion object { + private fun producer(errorConfig: ErrorConfig, config: Properties, log: (String, Exception?) -> Unit) = + errorConfig.dlqTopic?.let { + try { + val bootstrapServers = config.getOrDefault(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, "").toString() + validateConnection(bootstrapServers, CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, false) + config[ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG] = ByteArraySerializer::class.java.name + config[ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG] = ByteArraySerializer::class.java.name + KafkaProducer(config) + } catch (e: Exception) { + log("Cannot initialize the custom DLQ because of the following exception: ", e) + null + } + } + } + + override fun report(errorDatas: List) { + if (errorConfig.fail) throw ProcessingError(errorDatas) + if (errorConfig.log) { + if (errorConfig.logMessages) { + errorDatas.forEach{log(it.toLogString(),it.exception)} + } else { + errorDatas.map { it.exception }.distinct().forEach{log("Error processing ${errorDatas.size} messages",it)} + } + } + + errorDatas.forEach { dlqData -> + producer?.let { + try { + val producerRecord = if (dlqData.timestamp == RecordBatch.NO_TIMESTAMP) { + ProducerRecord(errorConfig.dlqTopic, null, dlqData.key, dlqData.value) + } else { + ProducerRecord(errorConfig.dlqTopic, null, dlqData.timestamp, dlqData.key, dlqData.value) + } + if (errorConfig.dlqHeaders) { + val producerHeader = producerRecord.headers() + populateContextHeaders(dlqData).forEach { (key, value) -> producerHeader.add(key, value) } + } + it.send(producerRecord) + } catch (e: Exception) { + log("Error writing to DLQ $e: ${dlqData.toLogString()}", e) // todo only the first or all + } + } + } + } + + @VisibleForTesting + fun populateContextHeaders(errorData: ErrorData): Map { + fun prefix(suffix: String) = errorConfig.dlqHeaderPrefix + suffix + + val headers = mutableMapOf( + prefix("topic") to errorData.originalTopic.toByteArray(), + prefix("partition") to errorData.partition.toByteArray(), + prefix("offset") to errorData.offset.toByteArray()) + + if (!errorData.databaseName.isNullOrBlank()) { + headers[prefix("databaseName")] = errorData.databaseName.toByteArray() + } + + if (errorData.executingClass != null) { + headers[prefix("class.name")] = errorData.executingClass.name.toByteArray() + } + if (errorData.exception != null) { + headers[prefix("exception.class.name")] = errorData.exception.javaClass.name.toByteArray() + if (errorData.exception.message != null) { + headers[prefix("exception.message")] = errorData.exception.message.toString().toByteArray() + } + headers[prefix("exception.stacktrace")] = ExceptionUtils.getStackTrace(errorData.exception).toByteArray() + } + return headers + } + + + override fun close() { + this.producer?.close() + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/CUDIngestionStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/CUDIngestionStrategy.kt new file mode 100644 index 0000000000..9e3d294620 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/CUDIngestionStrategy.kt @@ -0,0 +1,282 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.events.EntityType +import apoc.kafka.extensions.quote +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.service.sink.strategy.CUDIngestionStrategy.Companion.FROM_KEY +import apoc.kafka.service.sink.strategy.CUDIngestionStrategy.Companion.TO_KEY +import apoc.kafka.utils.JSONUtils +import apoc.kafka.utils.KafkaUtil.getLabelsAsString +import apoc.kafka.utils.KafkaUtil.getNodeKeysAsString +import apoc.kafka.utils.KafkaUtil + + +enum class CUDOperations { create, merge, update, delete, match } + +abstract class CUD { + abstract val op: CUDOperations + abstract val type: EntityType + abstract val properties: Map +} + +data class CUDNode(override val op: CUDOperations, + override val properties: Map = emptyMap(), + val ids: Map = emptyMap(), + val detach: Boolean = true, + val labels: List = emptyList()): CUD() { + override val type = EntityType.node + + fun toMap(): Map { + return when (op) { + CUDOperations.delete -> mapOf("ids" to ids) + else -> mapOf("ids" to ids, "properties" to properties) + } + } +} + +data class CUDNodeRel(val ids: Map = emptyMap(), + val labels: List, + val op: CUDOperations = CUDOperations.match) + +data class CUDRelationship(override val op: CUDOperations, + override val properties: Map = emptyMap(), + val rel_type: String, + val from: CUDNodeRel, + val to: CUDNodeRel): CUD() { + override val type = EntityType.relationship + + fun toMap(): Map { + val from = mapOf("ids" to from.ids) + val to = mapOf("ids" to to.ids) + return when (op) { + CUDOperations.delete -> mapOf(FROM_KEY to from, + TO_KEY to to) + else -> mapOf(FROM_KEY to from, + TO_KEY to to, + "properties" to properties) + } + } +} + + +class CUDIngestionStrategy: IngestionStrategy { + + companion object { + @JvmStatic val ID_KEY = "ids" + @JvmStatic val PHYSICAL_ID_KEY = "_id" + @JvmStatic val FROM_KEY = "from" + @JvmStatic val TO_KEY = "to" + + private val LIST_VALID_CUD_NODE_REL = listOf(CUDOperations.merge, CUDOperations.create, CUDOperations.match) + private val LIST_VALID_CUD_REL = listOf(CUDOperations.create, CUDOperations.merge, CUDOperations.update) + } + + data class NodeRelMetadata(val labels: List, val ids: Set, val op: CUDOperations = CUDOperations.match) + + private fun CUDRelationship.isValidOperation(): Boolean = from.op in LIST_VALID_CUD_NODE_REL && to.op in LIST_VALID_CUD_NODE_REL && op in LIST_VALID_CUD_REL + + private fun NodeRelMetadata.getOperation() = op.toString().toUpperCase() + + private fun buildNodeLookupByIds(keyword: String = "MATCH", ids: Set, labels: List, identifier: String = "n", field: String = ""): String { + val fullField = if (field.isNotBlank()) "$field." else field + val quotedIdentifier = identifier.quote() + return when (ids.contains(PHYSICAL_ID_KEY)) { + true -> "MATCH ($quotedIdentifier) WHERE id($quotedIdentifier) = event.$fullField$ID_KEY._id" + else -> "$keyword ($quotedIdentifier${getLabelsAsString(labels)} {${getNodeKeysAsString(keys = ids, prefix = "$fullField$ID_KEY")}})" + } + } + + private fun buildNodeCreateStatement(labels: List): String = """ + |${KafkaUtil.UNWIND} + |CREATE (n${getLabelsAsString(labels)}) + |SET n = event.properties + """.trimMargin() + + private fun buildRelCreateStatement(from: NodeRelMetadata, to: NodeRelMetadata, + rel_type: String): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(keyword = from.getOperation(), ids = from.ids, labels = from.labels, identifier = FROM_KEY, field = FROM_KEY)} + |${KafkaUtil.WITH_EVENT_FROM} + |${buildNodeLookupByIds(keyword = to.getOperation(), ids = to.ids, labels = to.labels, identifier = TO_KEY, field = TO_KEY)} + |CREATE ($FROM_KEY)-[r:${rel_type.quote()}]->($TO_KEY) + |SET r = event.properties + """.trimMargin() + + private fun buildNodeMergeStatement(labels: List, ids: Set): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(keyword = "MERGE", ids = ids, labels = labels)} + |SET n += event.properties + """.trimMargin() + + private fun buildRelMergeStatement(from: NodeRelMetadata, to: NodeRelMetadata, + rel_type: String): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(keyword = from.getOperation(), ids = from.ids, labels = from.labels, identifier = FROM_KEY, field = FROM_KEY)} + |${KafkaUtil.WITH_EVENT_FROM} + |${buildNodeLookupByIds(keyword = to.getOperation(), ids = to.ids, labels = to.labels, identifier = TO_KEY, field = TO_KEY)} + |MERGE ($FROM_KEY)-[r:${rel_type.quote()}]->($TO_KEY) + |SET r += event.properties + """.trimMargin() + + private fun buildNodeUpdateStatement(labels: List, ids: Set): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(ids = ids, labels = labels)} + |SET n += event.properties + """.trimMargin() + + private fun buildRelUpdateStatement(from: NodeRelMetadata, to: NodeRelMetadata, + rel_type: String): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(ids = from.ids, labels = from.labels, identifier = FROM_KEY, field = FROM_KEY)} + |${buildNodeLookupByIds(ids = to.ids, labels = to.labels, identifier = TO_KEY, field = TO_KEY)} + |MATCH ($FROM_KEY)-[r:${rel_type.quote()}]->($TO_KEY) + |SET r += event.properties + """.trimMargin() + + private fun buildDeleteStatement(labels: List, ids: Set, detach: Boolean): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(ids = ids, labels = labels)} + |${if (detach) "DETACH " else ""}DELETE n + """.trimMargin() + + private fun buildRelDeleteStatement(from: NodeRelMetadata, to: NodeRelMetadata, + rel_type: String): String = """ + |${KafkaUtil.UNWIND} + |${buildNodeLookupByIds(ids = from.ids, labels = from.labels, identifier = FROM_KEY, field = FROM_KEY)} + |${buildNodeLookupByIds(ids = to.ids, labels = to.labels, identifier = TO_KEY, field = TO_KEY)} + |MATCH ($FROM_KEY)-[r:${rel_type.quote()}]->($TO_KEY) + |DELETE r + """.trimMargin() + + private inline fun toCUDEntity(it: Any): T? { + return when (it) { + is T -> it + is Map<*, *> -> { + val type = it["type"]?.toString() + val entityType = if (type == null) null else EntityType.valueOf(type) + when { + entityType == null -> throw RuntimeException("No `type` field found") + entityType != null && EntityType.node == entityType && T::class.java != CUDNode::class.java -> null + entityType != null && EntityType.relationship == entityType && T::class.java != CUDRelationship::class.java -> null + else -> JSONUtils.convertValue(it) + } + } + else -> null + } + } + + private fun getLabels(relNode: CUDNodeRel) = if (relNode.ids.containsKey(PHYSICAL_ID_KEY)) emptyList() else relNode.labels + private fun getLabels(node: CUDNode) = if (node.ids.containsKey(PHYSICAL_ID_KEY)) emptyList() else node.labels + + override fun mergeNodeEvents(events: Collection): List { + val data = events + .mapNotNull { + it.value?.let { + try { + val data = toCUDEntity(it) + when (data?.op) { + CUDOperations.merge -> if (data.ids.isNotEmpty() && data.properties.isNotEmpty()) data else null // TODO send to the DLQ the null + CUDOperations.update, CUDOperations.create -> if (data.properties.isNotEmpty()) data else null // TODO send to the DLQ the null + else -> null + } + } catch (e: Exception) { + null + } + } + } + .groupBy({ it.op }, { it }) + + val create = data[CUDOperations.create] + .orEmpty() + .groupBy { getLabels(it) } + .map { QueryEvents(buildNodeCreateStatement(it.key), it.value.map { it.toMap() }) } + val merge = data[CUDOperations.merge] + .orEmpty() + .groupBy { getLabels(it) to it.ids.keys } + .map { QueryEvents(buildNodeMergeStatement(it.key.first, it.key.second), it.value.map { it.toMap() }) } + val update = data[CUDOperations.update] + .orEmpty() + .groupBy { getLabels(it) to it.ids.keys } + .map { QueryEvents(buildNodeUpdateStatement(it.key.first, it.key.second), it.value.map { it.toMap() }) } + return (create + merge + update) // we'll group the data because of in case of `_id` key is present the generated queries are the same for update/merge + .map { it.query to it.events } + .groupBy({ it.first }, { it.second }) + .map { QueryEvents(it.key, it.value.flatten()) } + } + + override fun deleteNodeEvents(events: Collection): List { + return events + .mapNotNull { + it.value?.let { + try { + val data = toCUDEntity(it) + when (data?.op) { + CUDOperations.delete -> if (data.ids.isNotEmpty() && data.properties.isEmpty()) data else null // TODO send to the DLQ the null + else -> null // TODO send to the DLQ the null + } + } catch (e: Exception) { + null + } + } + } + .groupBy { Triple(it.labels, it.ids.keys, it.detach) } + .map { + val (labels, keys, detach) = it.key + QueryEvents(buildDeleteStatement(labels, keys, detach), it.value.map { it.toMap() }) + } + } + + override fun mergeRelationshipEvents(events: Collection): List { + val data = events + .mapNotNull { + it.value?.let { + try { + val data = toCUDEntity(it) + when { + data!!.isValidOperation() -> if (data.from.ids.isNotEmpty() && data.to.ids.isNotEmpty()) data else null // TODO send to the DLQ the null + else -> null // TODO send to the DLQ the null + } + } catch (e: Exception) { + null + } + } + } + .groupBy({ it.op }, { it }) + + return data.flatMap { (op, list) -> + list.groupBy { Triple(NodeRelMetadata(getLabels(it.from), it.from.ids.keys, it.from.op), NodeRelMetadata(getLabels(it.to), it.to.ids.keys, it.to.op), it.rel_type) } + .map { + val (from, to, rel_type) = it.key + val query = when (op) { + CUDOperations.create -> buildRelCreateStatement(from, to, rel_type) + CUDOperations.merge -> buildRelMergeStatement(from, to, rel_type) + else -> buildRelUpdateStatement(from, to, rel_type) + } + QueryEvents(query, it.value.map { it.toMap() }) + } + } + } + + override fun deleteRelationshipEvents(events: Collection): List { + return events + .mapNotNull { + it.value?.let { + try { + val data = toCUDEntity(it) + when (data?.op) { + CUDOperations.delete -> if (data.from.ids.isNotEmpty() && data.to.ids.isNotEmpty()) data else null // TODO send to the DLQ the null + else -> null // TODO send to the DLQ the null + } + } catch (e: Exception) { + null + } + } + } + .groupBy { Triple(NodeRelMetadata(getLabels(it.from), it.from.ids.keys), NodeRelMetadata(getLabels(it.to), it.to.ids.keys), it.rel_type) } + .map { + val (from, to, rel_type) = it.key + QueryEvents(buildRelDeleteStatement(from, to, rel_type), it.value.map { it.toMap() }) + } + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/CypherTemplateStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/CypherTemplateStrategy.kt new file mode 100644 index 0000000000..7183cd3f6e --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/CypherTemplateStrategy.kt @@ -0,0 +1,18 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.utils.KafkaUtil + +class CypherTemplateStrategy(query: String): IngestionStrategy { + private val fullQuery = "${KafkaUtil.UNWIND} $query" + override fun mergeNodeEvents(events: Collection): List { + return listOf(QueryEvents(fullQuery, events.mapNotNull { it.value as? Map })) + } + + override fun deleteNodeEvents(events: Collection): List = emptyList() + + override fun mergeRelationshipEvents(events: Collection): List = emptyList() + + override fun deleteRelationshipEvents(events: Collection): List = emptyList() + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/IngestionStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/IngestionStrategy.kt new file mode 100644 index 0000000000..714406baf6 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/IngestionStrategy.kt @@ -0,0 +1,37 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.events.Constraint +import apoc.kafka.events.RelationshipPayload +import apoc.kafka.service.StreamsSinkEntity + + +data class QueryEvents(val query: String, val events: List>) + +interface IngestionStrategy { + fun mergeNodeEvents(events: Collection): List + fun deleteNodeEvents(events: Collection): List + fun mergeRelationshipEvents(events: Collection): List + fun deleteRelationshipEvents(events: Collection): List +} + +data class RelationshipSchemaMetadata(val label: String, + val startLabels: List, + val endLabels: List, + val startKeys: Set, + val endKeys: Set) { + constructor(payload: RelationshipPayload) : this(label = payload.label, + startLabels = payload.start.labels.orEmpty(), + endLabels = payload.end.labels.orEmpty(), + startKeys = payload.start.ids.keys, + endKeys = payload.end.ids.keys) +} + +data class NodeSchemaMetadata(val constraints: List, + val labelsToAdd: List, + val labelsToDelete: List, + val keys: Set) + + + +data class NodeMergeMetadata(val labelsToAdd: Set, + val labelsToDelete: Set) \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/NodePatternIngestionStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/NodePatternIngestionStrategy.kt new file mode 100644 index 0000000000..b22bdf8080 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/NodePatternIngestionStrategy.kt @@ -0,0 +1,91 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.extensions.flatten +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.utils.JSONUtils +import apoc.kafka.utils.KafkaUtil.containsProp +import apoc.kafka.utils.KafkaUtil.getLabelsAsString +import apoc.kafka.utils.KafkaUtil.getNodeMergeKeys +import apoc.kafka.utils.KafkaUtil + +class NodePatternIngestionStrategy(private val nodePatternConfiguration: NodePatternConfiguration): IngestionStrategy { + + private val mergeNodeTemplate: String = """ + |${KafkaUtil.UNWIND} + |MERGE (n${getLabelsAsString(nodePatternConfiguration.labels)}{${ + getNodeMergeKeys("keys", nodePatternConfiguration.keys) + }}) + |SET n = event.properties + |SET n += event.keys + """.trimMargin() + + private val deleteNodeTemplate: String = """ + |${KafkaUtil.UNWIND} + |MATCH (n${getLabelsAsString(nodePatternConfiguration.labels)}{${ + getNodeMergeKeys("keys", nodePatternConfiguration.keys) + }}) + |DETACH DELETE n + """.trimMargin() + + override fun mergeNodeEvents(events: Collection): List { + val data = events + .mapNotNull { if (it.value != null) JSONUtils.asMap(it.value) else null } + .mapNotNull { toData(nodePatternConfiguration, it) } + return if (data.isEmpty()) { + emptyList() + } else { + listOf(QueryEvents(mergeNodeTemplate, data)) + } + } + + override fun deleteNodeEvents(events: Collection): List { + val data = events + .filter { it.value == null && it.key != null } + .mapNotNull { if (it.key != null) JSONUtils.asMap(it.key) else null } + .mapNotNull { toData(nodePatternConfiguration, it, false) } + return if (data.isEmpty()) { + emptyList() + } else { + listOf(QueryEvents(deleteNodeTemplate, data)) + } + } + + override fun mergeRelationshipEvents(events: Collection): List { + return emptyList() + } + + override fun deleteRelationshipEvents(events: Collection): List { + return emptyList() + } + + companion object { + fun toData(nodePatternConfiguration: NodePatternConfiguration, props: Map, withProperties: Boolean = true): Map>? { + val properties = props.flatten() + val containsKeys = nodePatternConfiguration.keys.all { properties.containsKey(it) } + return if (containsKeys) { + val filteredProperties = when (nodePatternConfiguration.type) { + PatternConfigurationType.ALL -> properties.filterKeys { !nodePatternConfiguration.keys.contains(it) } + PatternConfigurationType.EXCLUDE -> properties.filterKeys { key -> + val containsProp = containsProp(key, nodePatternConfiguration.properties) + !nodePatternConfiguration.keys.contains(key) && !containsProp + } + PatternConfigurationType.INCLUDE -> properties.filterKeys { key -> + val containsProp = containsProp(key, nodePatternConfiguration.properties) + !nodePatternConfiguration.keys.contains(key) && containsProp + } + } + if (withProperties) { + mapOf("keys" to properties.filterKeys { nodePatternConfiguration.keys.contains(it) }, + "properties" to filteredProperties) + } else { + mapOf("keys" to properties.filterKeys { nodePatternConfiguration.keys.contains(it) }) + } + } else { + null + } + } + + + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/PatternConfiguration.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/PatternConfiguration.kt new file mode 100644 index 0000000000..c9abee446d --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/PatternConfiguration.kt @@ -0,0 +1,198 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.extensions.quote + +enum class PatternConfigurationType { ALL, INCLUDE, EXCLUDE } + +private const val ID_PREFIX = "!" +private const val MINUS_PREFIX = "-" +private const val LABEL_SEPARATOR = ":" +private const val PROPERTIES_SEPARATOR = "," + +private fun getPatternConfiguredType(properties: List): PatternConfigurationType { + if (properties.isEmpty()) { + return PatternConfigurationType.ALL + } + return when (properties[0].trim()[0]) { + '*' -> PatternConfigurationType.ALL + '-' -> PatternConfigurationType.EXCLUDE + else -> PatternConfigurationType.INCLUDE + } +} + +private fun isHomogeneousPattern(type: PatternConfigurationType, properties: List, pattern: String, entityType: String) { + val isHomogeneous = when (type) { + PatternConfigurationType.INCLUDE -> properties.all { it.trim()[0].isJavaIdentifierStart() } + PatternConfigurationType.EXCLUDE -> properties.all { it.trim().startsWith(MINUS_PREFIX) } + PatternConfigurationType.ALL -> properties.isEmpty() || properties == listOf("*") + } + if (!isHomogeneous) { + throw IllegalArgumentException("The $entityType pattern $pattern is not homogeneous") + } +} + +private fun cleanProperties(type: PatternConfigurationType, properties: List): List { + return when (type) { + PatternConfigurationType.INCLUDE -> properties.map { it.trim() } + PatternConfigurationType.EXCLUDE -> properties.map { it.trim().replace(MINUS_PREFIX, "") } + PatternConfigurationType.ALL -> emptyList() + } +} + +interface PatternConfiguration + +data class NodePatternConfiguration(val keys: Set, val type: PatternConfigurationType, + val labels: List, val properties: List): PatternConfiguration { + companion object { + + // (:LabelA{!id,foo,bar}) + @JvmStatic private val cypherNodePatternConfigured = """\((:\w+\s*(?::\s*(?:\w+)\s*)*)\s*(?:\{\s*(-?[\w!\.]+\s*(?:,\s*-?[!\w\*\.]+\s*)*)\})?\)$""".toRegex() + // LabelA{!id,foo,bar} + @JvmStatic private val simpleNodePatternConfigured = """^(\w+\s*(?::\s*(?:\w+)\s*)*)\s*(?:\{\s*(-?[\w!\.]+\s*(?:,\s*-?[!\w\*\.]+\s*)*)\})?$""".toRegex() + fun parse(pattern: String): NodePatternConfiguration { + val isCypherPattern = pattern.startsWith("(") + val regex = if (isCypherPattern) cypherNodePatternConfigured else simpleNodePatternConfigured + val matcher = regex.matchEntire(pattern) + if (matcher == null) { + throw IllegalArgumentException("The Node pattern $pattern is invalid") + } else { + val labels = matcher.groupValues[1] + .split(LABEL_SEPARATOR) + .let { + if (isCypherPattern) it.drop(1) else it + } + .map { it.quote() } + val allProperties = matcher.groupValues[2].split(PROPERTIES_SEPARATOR) + val keys = allProperties + .filter { it.startsWith(ID_PREFIX) } + .map { it.trim().substring(1) }.toSet() + if (keys.isEmpty()) { + throw IllegalArgumentException("The Node pattern $pattern must contains at lest one key") + } + val properties = allProperties.filter { !it.startsWith(ID_PREFIX) } + val type = getPatternConfiguredType(properties) + isHomogeneousPattern(type, properties, pattern, "Node") + val cleanedProperties = cleanProperties(type, properties) + + return NodePatternConfiguration(keys = keys, type = type, + labels = labels, properties = cleanedProperties) + } + } + } +} + + +data class RelationshipPatternConfiguration(val start: NodePatternConfiguration, val end: NodePatternConfiguration, + val relType: String, val type: PatternConfigurationType, + val properties: List): PatternConfiguration { + companion object { + + // we don't allow ALL for start/end nodes in rels + // it's public for testing purpose + fun getNodeConf(pattern: String): NodePatternConfiguration { + val start = NodePatternConfiguration.parse(pattern) + return if (start.type == PatternConfigurationType.ALL) { + NodePatternConfiguration(keys = start.keys, type = PatternConfigurationType.INCLUDE, + labels = start.labels, properties = start.properties) + } else { + start + } + } + + // (:Source{!id})-[:REL_TYPE{foo, -bar}]->(:Target{!targetId}) + private val cypherRelationshipPatternConfigured = """^\(:(.*?)\)(<)?-\[(?::)([\w\_]+)(\{\s*(-?[\w\*\.]+\s*(?:,\s*-?[\w\*\.]+\s*)*)\})?\]-(>)?\(:(.*?)\)$""".toRegex() + // LabelA{!id} REL_TYPE{foo, -bar} LabelB{!targetId} + private val simpleRelationshipPatternConfigured = """^(.*?) ([\w\_]+)(\{\s*(-?[\w\*\.]+\s*(?:,\s*-?[\w\*\.]+\s*)*)\})? (.*?)$""".toRegex() // """^\((.*?)\)-\[(?::)([\w\_]+)(\{\s*(-?[\w\*\.]+\s*(?:,\s*-?[\w\*\.]+\s*)*)\})?\]->\((.*?)\)$""".toRegex() + + data class RelationshipPatternMetaData(val startPattern: String, val endPattern: String, val relType: String, val properties: List) { + companion object { + + private fun toProperties(propGroup: String): List = if (propGroup.isNullOrBlank()) { + emptyList() + } else { + propGroup.split(PROPERTIES_SEPARATOR) + } + + fun create(isCypherPattern: Boolean, isLeftToRight: Boolean, groupValues: List): RelationshipPatternMetaData { + lateinit var start: String + lateinit var end: String + lateinit var relType: String + lateinit var props: List + + if (isCypherPattern) { + if (isLeftToRight) { + start = groupValues[1] + end = groupValues[7] + } else { + start = groupValues[7] + end = groupValues[1] + } + relType = groupValues[3] + props = toProperties(groupValues[5]) + } else { + if (isLeftToRight) { + start = groupValues[1] + end = groupValues[5] + } else { + start = groupValues[5] + end = groupValues[1] + } + relType = groupValues[2] + props = toProperties(groupValues[4]) + } + + return RelationshipPatternMetaData(startPattern = start, + endPattern = end, relType = relType, + properties = props) + } + } + } + + fun parse(pattern: String): RelationshipPatternConfiguration { + val isCypherPattern = pattern.startsWith("(") + val regex = if (isCypherPattern) { + cypherRelationshipPatternConfigured + } else { + simpleRelationshipPatternConfigured + } + val matcher = regex.matchEntire(pattern) + if (matcher == null) { + throw IllegalArgumentException("The Relationship pattern $pattern is invalid") + } else { + val isLeftToRight = (!isCypherPattern || isUndirected(matcher) || isDirectedToRight(matcher)) + val isRightToLeft = if (isCypherPattern) isDirectedToLeft(matcher) else false + + if (!isLeftToRight && !isRightToLeft) { + throw IllegalArgumentException("The Relationship pattern $pattern has an invalid direction") + } + + val metadata = RelationshipPatternMetaData.create(isCypherPattern, isLeftToRight, matcher.groupValues) + + val start = try { + getNodeConf(metadata.startPattern) + } catch (e: Exception) { + throw IllegalArgumentException("The Relationship pattern $pattern is invalid") + } + val end = try { + getNodeConf(metadata.endPattern) + } catch (e: Exception) { + throw IllegalArgumentException("The Relationship pattern $pattern is invalid") + } + val type = getPatternConfiguredType(metadata.properties) + isHomogeneousPattern(type, metadata.properties, pattern, "Relationship") + val cleanedProperties = cleanProperties(type, metadata.properties) + return RelationshipPatternConfiguration(start = start, end = end, relType = metadata.relType, + properties = cleanedProperties, type = type) + } + } + + private fun isDirectedToLeft(matcher: MatchResult) = + (matcher.groupValues[2] == "<" && matcher.groupValues[6] == "") + + private fun isDirectedToRight(matcher: MatchResult) = + (matcher.groupValues[2] == "" && matcher.groupValues[6] == ">") + + private fun isUndirected(matcher: MatchResult) = + (matcher.groupValues[2] == "" && matcher.groupValues[6] == "") + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/RelationshipPatternIngestionStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/RelationshipPatternIngestionStrategy.kt new file mode 100644 index 0000000000..f8188eb78e --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/RelationshipPatternIngestionStrategy.kt @@ -0,0 +1,120 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.extensions.flatten +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.utils.JSONUtils +import apoc.kafka.utils.KafkaUtil.containsProp +import apoc.kafka.utils.KafkaUtil.getLabelsAsString +import apoc.kafka.utils.KafkaUtil.getNodeMergeKeys +import apoc.kafka.utils.KafkaUtil + +class RelationshipPatternIngestionStrategy(private val relationshipPatternConfiguration: RelationshipPatternConfiguration): IngestionStrategy { + + private val mergeRelationshipTemplate: String = """ + |${KafkaUtil.UNWIND} + |MERGE (start${getLabelsAsString(relationshipPatternConfiguration.start.labels)}{${ + getNodeMergeKeys("start.keys", relationshipPatternConfiguration.start.keys) + }}) + |SET start = event.start.properties + |SET start += event.start.keys + |MERGE (end${getLabelsAsString(relationshipPatternConfiguration.end.labels)}{${ + getNodeMergeKeys("end.keys", relationshipPatternConfiguration.end.keys) + }}) + |SET end = event.end.properties + |SET end += event.end.keys + |MERGE (start)-[r:${relationshipPatternConfiguration.relType}]->(end) + |SET r = event.properties + """.trimMargin() + + private val deleteRelationshipTemplate: String = """ + |${KafkaUtil.UNWIND} + |MATCH (start${getLabelsAsString(relationshipPatternConfiguration.start.labels)}{${ + getNodeMergeKeys("start.keys", relationshipPatternConfiguration.start.keys) + }}) + |MATCH (end${getLabelsAsString(relationshipPatternConfiguration.end.labels)}{${ + getNodeMergeKeys("end.keys", relationshipPatternConfiguration.end.keys) + }}) + |MATCH (start)-[r:${relationshipPatternConfiguration.relType}]->(end) + |DELETE r + """.trimMargin() + + override fun mergeNodeEvents(events: Collection): List { + return emptyList() + } + + override fun deleteNodeEvents(events: Collection): List { + return emptyList() + } + + override fun mergeRelationshipEvents(events: Collection): List { + val data = events + .mapNotNull { if (it.value != null) JSONUtils.asMap(it.value) else null } + .mapNotNull { props -> + val properties = props.flatten() + val containsKeys = relationshipPatternConfiguration.start.keys.all { properties.containsKey(it) } + && relationshipPatternConfiguration.end.keys.all { properties.containsKey(it) } + if (containsKeys) { + val filteredProperties = when (relationshipPatternConfiguration.type) { + PatternConfigurationType.ALL -> properties.filterKeys { isRelationshipProperty(it) } + PatternConfigurationType.EXCLUDE -> properties.filterKeys { + val containsProp = containsProp(it, relationshipPatternConfiguration.properties) + isRelationshipProperty(it) && !containsProp + } + PatternConfigurationType.INCLUDE -> properties.filterKeys { + val containsProp = containsProp(it, relationshipPatternConfiguration.properties) + isRelationshipProperty(it) && containsProp + } + } + val startConf = relationshipPatternConfiguration.start + val endConf = relationshipPatternConfiguration.end + + val start = NodePatternIngestionStrategy.toData(startConf, props) + val end = NodePatternIngestionStrategy.toData(endConf, props) + + mapOf("start" to start, "end" to end, "properties" to filteredProperties) + } else { + null + } + } + return if (data.isEmpty()) { + emptyList() + } else { + listOf(QueryEvents(mergeRelationshipTemplate, data)) + } + } + + private fun isRelationshipProperty(propertyName: String): Boolean { + return (!relationshipPatternConfiguration.start.keys.contains(propertyName) + && !relationshipPatternConfiguration.start.properties.contains(propertyName) + && !relationshipPatternConfiguration.end.keys.contains(propertyName) + && !relationshipPatternConfiguration.end.properties.contains(propertyName)) + } + + override fun deleteRelationshipEvents(events: Collection): List { + val data = events + .filter { it.value == null && it.key != null } + .mapNotNull { if (it.key != null) JSONUtils.asMap(it.key) else null } + .mapNotNull { props -> + val properties = props.flatten() + val containsKeys = relationshipPatternConfiguration.start.keys.all { properties.containsKey(it) } + && relationshipPatternConfiguration.end.keys.all { properties.containsKey(it) } + if (containsKeys) { + val startConf = relationshipPatternConfiguration.start + val endConf = relationshipPatternConfiguration.end + + val start = NodePatternIngestionStrategy.toData(startConf, props) + val end = NodePatternIngestionStrategy.toData(endConf, props) + + mapOf("start" to start, "end" to end) + } else { + null + } + } + return if (data.isEmpty()) { + emptyList() + } else { + listOf(QueryEvents(deleteRelationshipTemplate, data)) + } + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/SchemaIngestionStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/SchemaIngestionStrategy.kt new file mode 100644 index 0000000000..daaf717017 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/SchemaIngestionStrategy.kt @@ -0,0 +1,185 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.events.* +import apoc.kafka.extensions.quote +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.getLabelsAsString +import apoc.kafka.utils.KafkaUtil.getNodeKeysAsString +import apoc.kafka.utils.KafkaUtil.getNodeKeys +import apoc.kafka.utils.KafkaUtil.toStreamsTransactionEvent + + +class SchemaIngestionStrategy: IngestionStrategy { + + private fun prepareRelationshipEvents(events: List, withProperties: Boolean = true): Map>> = events + .mapNotNull { + val payload = it.payload as RelationshipPayload + + val startNodeConstraints = getNodeConstraints(it) { + it.type == StreamsConstraintType.UNIQUE && payload.start.labels.orEmpty().contains(it.label) + } + val endNodeConstraints = getNodeConstraints(it) { + it.type == StreamsConstraintType.UNIQUE && payload.end.labels.orEmpty().contains(it.label) + } + + if (constraintsAreEmpty(startNodeConstraints, endNodeConstraints)) { + null + } else { + createRelationshipMetadata(payload, startNodeConstraints, endNodeConstraints, withProperties) + } + } + .groupBy { it.first } + .mapValues { it.value.map { it.second } } + + private fun createRelationshipMetadata(payload: RelationshipPayload, startNodeConstraints: List, endNodeConstraints: List, withProperties: Boolean): Pair>>? { + val startNodeKeys = getNodeKeys( + labels = payload.start.labels.orEmpty(), + propertyKeys = payload.start.ids.keys, + constraints = startNodeConstraints) + val endNodeKeys = getNodeKeys( + labels = payload.end.labels.orEmpty(), + propertyKeys = payload.end.ids.keys, + constraints = endNodeConstraints) + val start = payload.start.ids.filterKeys { startNodeKeys.contains(it) } + val end = payload.end.ids.filterKeys { endNodeKeys.contains(it) } + + return if (idsAreEmpty(start, end)) { + null + } else { + val value = if (withProperties) { + val properties = payload.after?.properties ?: payload.before?.properties ?: emptyMap() + mapOf("start" to start, "end" to end, "properties" to properties) + } else { + mapOf("start" to start, "end" to end) + } + val key = RelationshipSchemaMetadata( + label = payload.label, + startLabels = payload.start.labels.orEmpty().filter { label -> startNodeConstraints.any { it.label == label } }, + endLabels = payload.end.labels.orEmpty().filter { label -> endNodeConstraints.any { it.label == label } }, + startKeys = start.keys, + endKeys = end.keys + ) + key to value + } + } + + private fun idsAreEmpty(start: Map, end: Map) = + start.isEmpty() || end.isEmpty() + + private fun constraintsAreEmpty(startNodeConstraints: List, endNodeConstraints: List) = + startNodeConstraints.isEmpty() || endNodeConstraints.isEmpty() + + override fun mergeRelationshipEvents(events: Collection): List { + return prepareRelationshipEvents(events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.relationship + && it.meta.operation != OperationType.deleted } }) + .map { + val label = it.key.label.quote() + val query = """ + |${KafkaUtil.UNWIND} + |MERGE (start${getLabelsAsString(it.key.startLabels)}{${getNodeKeysAsString("start", it.key.startKeys)}}) + |MERGE (end${getLabelsAsString(it.key.endLabels)}{${getNodeKeysAsString("end", it.key.endKeys)}}) + |MERGE (start)-[r:$label]->(end) + |SET r = event.properties + """.trimMargin() + QueryEvents(query, it.value) + } + } + + override fun deleteRelationshipEvents(events: Collection): List { + return prepareRelationshipEvents(events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.relationship + && it.meta.operation == OperationType.deleted } }, false) + .map { + val label = it.key.label.quote() + val query = """ + |${KafkaUtil.UNWIND} + |MATCH (start${getLabelsAsString(it.key.startLabels)}{${getNodeKeysAsString("start", it.key.startKeys)}}) + |MATCH (end${getLabelsAsString(it.key.endLabels)}{${getNodeKeysAsString("end", it.key.endKeys)}}) + |MATCH (start)-[r:$label]->(end) + |DELETE r + """.trimMargin() + QueryEvents(query, it.value) + } + } + + override fun deleteNodeEvents(events: Collection): List { + return events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.node && it.meta.operation == OperationType.deleted } } + .mapNotNull { + val changeEvtBefore = it.payload.before as NodeChange + val constraints = getNodeConstraints(it) { it.type == StreamsConstraintType.UNIQUE } + if (constraints.isEmpty()) { + null + } else { + constraints to mapOf("properties" to changeEvtBefore.properties) + } + } + .groupBy({ it.first }, { it.second }) + .map { + val labels = it.key.mapNotNull { it.label } + val nodeKeys = it.key.flatMap { it.properties }.toSet() + val query = """ + |${KafkaUtil.UNWIND} + |MATCH (n${getLabelsAsString(labels)}{${getNodeKeysAsString(keys = nodeKeys)}}) + |DETACH DELETE n + """.trimMargin() + QueryEvents(query, it.value) + } + } + + override fun mergeNodeEvents(events: Collection): List { + val filterLabels: (List, List) -> List = { labels, constraints -> + labels.filter { label -> !constraints.any { constraint -> constraint.label == label } } + .map { it.quote() } + } + return events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.node && it.meta.operation != OperationType.deleted } } + .mapNotNull { + val changeEvtAfter = it.payload.after as NodeChange + val labelsAfter = changeEvtAfter.labels ?: emptyList() + val labelsBefore = (it.payload.before as? NodeChange)?.labels.orEmpty() + + val constraints = getNodeConstraints(it) { it.type == StreamsConstraintType.UNIQUE } + if (constraints.isEmpty()) { + null + } else { + val labelsToAdd = filterLabels((labelsAfter - labelsBefore), constraints) + val labelsToDelete = filterLabels((labelsBefore - labelsAfter), constraints) + + val propertyKeys = changeEvtAfter.properties?.keys ?: emptySet() + val keys = getNodeKeys(labelsAfter, propertyKeys, constraints) + + if (keys.isEmpty()) { + null + } else { + val key = NodeSchemaMetadata(constraints = constraints, + labelsToAdd = labelsToAdd, labelsToDelete = labelsToDelete, + keys = keys) + val value = mapOf("properties" to changeEvtAfter.properties) + key to value + } + } + } + .groupBy({ it.first }, { it.second }) + .map { map -> + var query = """ + |${KafkaUtil.UNWIND} + |MERGE (n${getLabelsAsString(map.key.constraints.mapNotNull { it.label })}{${getNodeKeysAsString(keys = map.key.keys)}}) + |SET n = event.properties + """.trimMargin() + if (map.key.labelsToAdd.isNotEmpty()) { + query += "\nSET n${getLabelsAsString(map.key.labelsToAdd)}" + } + if (map.key.labelsToDelete.isNotEmpty()) { + query += "\nREMOVE n${getLabelsAsString(map.key.labelsToDelete)}" + } + QueryEvents(query, map.value) + } + } + + private fun getNodeConstraints(event: StreamsTransactionEvent, + filter: (Constraint) -> Boolean): List = event.schema.constraints.filter { filter(it) } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/SourceIdIngestionStrategy.kt b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/SourceIdIngestionStrategy.kt new file mode 100644 index 0000000000..ac426953ae --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/service/sink/strategy/SourceIdIngestionStrategy.kt @@ -0,0 +1,110 @@ +package apoc.kafka.service.sink.strategy + +import apoc.kafka.events.EntityType +import apoc.kafka.events.NodeChange +import apoc.kafka.events.OperationType +import apoc.kafka.events.RelationshipChange +import apoc.kafka.events.RelationshipPayload +import apoc.kafka.extensions.quote +import apoc.kafka.service.StreamsSinkEntity +import apoc.kafka.utils.KafkaUtil +import apoc.kafka.utils.KafkaUtil.getLabelsAsString +import apoc.kafka.utils.KafkaUtil.toStreamsTransactionEvent + +data class SourceIdIngestionStrategyConfig(val labelName: String = "SourceEvent", val idName: String = "sourceId") + +class SourceIdIngestionStrategy(config: SourceIdIngestionStrategyConfig = SourceIdIngestionStrategyConfig()): IngestionStrategy { + + private val quotedLabelName = config.labelName.quote() + private val quotedIdName = config.idName.quote() + + override fun mergeRelationshipEvents(events: Collection): List { + return events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.relationship && it.meta.operation != OperationType.deleted } } + .map { data -> + val payload = data.payload as RelationshipPayload + val changeEvt = when (data.meta.operation) { + OperationType.deleted -> { + data.payload.before as RelationshipChange + } + else -> data.payload.after as RelationshipChange + } + payload.label to mapOf("id" to payload.id, + "start" to payload.start.id, "end" to payload.end.id, "properties" to changeEvt.properties) + } + .groupBy({ it.first }, { it.second }) + .map { + val query = """ + |${KafkaUtil.UNWIND} + |MERGE (start:$quotedLabelName{$quotedIdName: event.start}) + |MERGE (end:$quotedLabelName{$quotedIdName: event.end}) + |MERGE (start)-[r:${it.key.quote()}{$quotedIdName: event.id}]->(end) + |SET r = event.properties + |SET r.$quotedIdName = event.id + """.trimMargin() + QueryEvents(query, it.value) + } + } + + override fun deleteRelationshipEvents(events: Collection): List { + return events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.relationship && it.meta.operation == OperationType.deleted } } + .map { data -> + val payload = data.payload as RelationshipPayload + payload.label to mapOf("id" to data.payload.id) + } + .groupBy({ it.first }, { it.second }) + .map { + val query = "${KafkaUtil.UNWIND} MATCH ()-[r:${it.key.quote()}{$quotedIdName: event.id}]-() DELETE r" + QueryEvents(query, it.value) + } + } + + override fun deleteNodeEvents(events: Collection): List { + val data = events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.node && it.meta.operation == OperationType.deleted } } + .map { mapOf("id" to it.payload.id) } + if (data.isNullOrEmpty()) { + return emptyList() + } + val query = "${KafkaUtil.UNWIND} MATCH (n:$quotedLabelName{$quotedIdName: event.id}) DETACH DELETE n" + return listOf(QueryEvents(query, data)) + } + + override fun mergeNodeEvents(events: Collection): List { + return events + .mapNotNull { toStreamsTransactionEvent(it) { it.payload.type == EntityType.node && it.meta.operation != OperationType.deleted } } + .map { data -> + val changeEvtAfter = data.payload.after as NodeChange + val labelsAfter = changeEvtAfter.labels ?: emptyList() + val labelsBefore = if (data.payload.before != null) { + val changeEvtBefore = data.payload.before as NodeChange + changeEvtBefore.labels ?: emptyList() + } else { + emptyList() + } + val labelsToAdd = (labelsAfter - labelsBefore) + .toSet() + val labelsToDelete = (labelsBefore - labelsAfter) + .toSet() + NodeMergeMetadata(labelsToAdd = labelsToAdd, labelsToDelete = labelsToDelete) to mapOf("id" to data.payload.id, "properties" to changeEvtAfter.properties) + } + .groupBy({ it.first }, { it.second }) + .map { + var query = """ + |${KafkaUtil.UNWIND} + |MERGE (n:$quotedLabelName{$quotedIdName: event.id}) + |SET n = event.properties + |SET n.$quotedIdName = event.id + """.trimMargin() + if (it.key.labelsToDelete.isNotEmpty()) { + query += "\nREMOVE n${getLabelsAsString(it.key.labelsToDelete)}" + } + if (it.key.labelsToAdd.isNotEmpty()) { + query += "\nSET n${getLabelsAsString(it.key.labelsToAdd)}" + } + QueryEvents(query, it.value) + } + } + +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/utils/JSONUtils.kt b/extended/src/main/kotlin/apoc/kafka/utils/JSONUtils.kt new file mode 100644 index 0000000000..df3112da68 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/utils/JSONUtils.kt @@ -0,0 +1,144 @@ +package apoc.kafka.utils + +import apoc.kafka.events.StreamsTransactionEvent +import apoc.kafka.events.StreamsTransactionNodeEvent +import apoc.kafka.events.StreamsTransactionRelationshipEvent +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.core.JsonProcessingException +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.JsonSerializer +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.SerializationFeature +import com.fasterxml.jackson.databind.SerializerProvider +import com.fasterxml.jackson.databind.module.SimpleModule +import com.fasterxml.jackson.module.kotlin.convertValue +import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper +import com.fasterxml.jackson.module.kotlin.readValue +import org.neo4j.driver.internal.value.PointValue +import org.neo4j.graphdb.spatial.Point +import org.neo4j.values.storable.CoordinateReferenceSystem +import java.io.IOException +import java.time.temporal.TemporalAccessor + +abstract class StreamsPoint { abstract val crs: String } +data class StreamsPointCartesian(override val crs: String, val x: Double, val y: Double, val z: Double? = null): StreamsPoint() +data class StreamsPointWgs(override val crs: String, val latitude: Double, val longitude: Double, val height: Double? = null): StreamsPoint() + +fun Point.toStreamsPoint(): StreamsPoint { + val crsType = this.crs.type + val coordinate = this.coordinates[0].coordinate + return when (this.crs) { + CoordinateReferenceSystem.CARTESIAN -> StreamsPointCartesian(crsType, coordinate[0], coordinate[1]) + CoordinateReferenceSystem.CARTESIAN_3D -> StreamsPointCartesian(crsType, coordinate[0], coordinate[1], coordinate[2]) + CoordinateReferenceSystem.WGS_84 -> StreamsPointWgs(crsType, coordinate[0], coordinate[1]) + CoordinateReferenceSystem.WGS_84_3D -> StreamsPointWgs(crsType, coordinate[0], coordinate[1], coordinate[2]) + else -> throw IllegalArgumentException("Point type $crsType not supported") + } +} + +fun PointValue.toStreamsPoint(): StreamsPoint { + val point = this.asPoint() + return when (val crsType = point.srid()) { + CoordinateReferenceSystem.CARTESIAN.code -> StreamsPointCartesian(CoordinateReferenceSystem.CARTESIAN.name, point.x(), point.y()) + CoordinateReferenceSystem.CARTESIAN_3D.code -> StreamsPointCartesian(CoordinateReferenceSystem.CARTESIAN_3D.name, point.x(), point.y(), point.z()) + CoordinateReferenceSystem.WGS_84.code -> StreamsPointWgs(CoordinateReferenceSystem.WGS_84.name, point.x(), point.y()) + CoordinateReferenceSystem.WGS_84_3D.code -> StreamsPointWgs(CoordinateReferenceSystem.WGS_84_3D.name, point.x(), point.y(), point.z()) + else -> throw IllegalArgumentException("Point type $crsType not supported") + } +} + +class PointSerializer : JsonSerializer() { + @Throws(IOException::class, JsonProcessingException::class) + override fun serialize(value: Point?, jgen: JsonGenerator, + provider: SerializerProvider) { + if (value == null) { + return + } + jgen.writeObject(value.toStreamsPoint()) + } +} + +class PointValueSerializer : JsonSerializer() { + @Throws(IOException::class, JsonProcessingException::class) + override fun serialize(value: PointValue?, jgen: JsonGenerator, + provider: SerializerProvider) { + if (value == null) { + return + } + jgen.writeObject(value.toStreamsPoint()) + } +} + +class TemporalAccessorSerializer : JsonSerializer() { + @Throws(IOException::class, JsonProcessingException::class) + override fun serialize(value: TemporalAccessor?, jgen: JsonGenerator, + provider: SerializerProvider) { + if (value == null) { + return + } + jgen.writeString(value.toString()) + } +} + +// NOTE: it works differently from apoc.JSONUtil +object JSONUtils { + + private val OBJECT_MAPPER: ObjectMapper = jacksonObjectMapper() + private val STRICT_OBJECT_MAPPER: ObjectMapper = jacksonObjectMapper() + + init { + val module = SimpleModule("Neo4jKafkaSerializer") + KafkaUtil.ignoreExceptions({ module.addSerializer(Point::class.java, PointSerializer()) }, NoClassDefFoundError::class.java) // in case is loaded from + KafkaUtil.ignoreExceptions({ module.addSerializer(PointValue::class.java, PointValueSerializer()) }, NoClassDefFoundError::class.java) // in case is loaded from + module.addSerializer(TemporalAccessor::class.java, TemporalAccessorSerializer()) + OBJECT_MAPPER.registerModule(module) + OBJECT_MAPPER.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS) + OBJECT_MAPPER.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) + STRICT_OBJECT_MAPPER.registerModule(module) + } + + fun getObjectMapper(): ObjectMapper = OBJECT_MAPPER + + fun getStrictObjectMapper(): ObjectMapper = STRICT_OBJECT_MAPPER + + fun asMap(any: Any): Map { + return OBJECT_MAPPER.convertValue(any, Map::class.java) + .mapKeys { it.key.toString() } + } + + fun writeValueAsString(any: Any): String { + return OBJECT_MAPPER.writeValueAsString(any) + } + + fun writeValueAsBytes(any: Any): ByteArray { + return OBJECT_MAPPER.writeValueAsBytes(any) + } + + fun readValue(value: ByteArray, valueType: Class?): T { + return getObjectMapper().readValue(value, valueType) + } + + fun readValue(value: ByteArray): Any { + return getObjectMapper().readValue(value) + } + + inline fun convertValue(value: Any, objectMapper: ObjectMapper = getObjectMapper()): T { + return objectMapper.convertValue(value) + } + + fun asStreamsTransactionEvent(obj: Any): StreamsTransactionEvent { + return try { + val evt = when (obj) { + is String, is ByteArray -> STRICT_OBJECT_MAPPER.readValue(obj as ByteArray, StreamsTransactionNodeEvent::class.java) + else -> STRICT_OBJECT_MAPPER.convertValue(obj, StreamsTransactionNodeEvent::class.java) + } + evt.toStreamsTransactionEvent() + } catch (e: Exception) { + val evt = when (obj) { + is String, is ByteArray -> STRICT_OBJECT_MAPPER.readValue(obj as ByteArray, StreamsTransactionRelationshipEvent::class.java) + else -> STRICT_OBJECT_MAPPER.convertValue(obj, StreamsTransactionRelationshipEvent::class.java) + } + evt.toStreamsTransactionEvent() + } + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/utils/KafkaUtil.kt b/extended/src/main/kotlin/apoc/kafka/utils/KafkaUtil.kt new file mode 100644 index 0000000000..e809d5863b --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/utils/KafkaUtil.kt @@ -0,0 +1,341 @@ +package apoc.kafka.utils + +import apoc.ApocConfig +import apoc.ExtendedApocConfig.APOC_KAFKA_ENABLED +import apoc.kafka.events.Constraint +import apoc.kafka.events.RelKeyStrategy +import apoc.kafka.events.StreamsConstraintType +import apoc.kafka.events.StreamsTransactionEvent +import apoc.kafka.extensions.execute +import apoc.kafka.extensions.quote +import apoc.kafka.service.StreamsSinkEntity +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.delay +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.apache.kafka.clients.CommonClientConfigs +import org.apache.kafka.clients.admin.AdminClient +import org.apache.kafka.clients.admin.AdminClientConfig +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.common.config.ConfigResource +import org.apache.kafka.common.config.SaslConfigs +import org.apache.kafka.common.config.SslConfigs +import org.apache.kafka.common.config.TopicConfig +import org.neo4j.dbms.api.DatabaseManagementService +import org.neo4j.dbms.systemgraph.TopologyGraphDbmsModel.HostedOnMode +import org.neo4j.exceptions.UnsatisfiedDependencyException +import org.neo4j.graphdb.GraphDatabaseService +import org.neo4j.graphdb.QueryExecutionException +import org.neo4j.kernel.internal.GraphDatabaseAPI +import org.neo4j.logging.Log +import org.neo4j.logging.internal.LogService +import java.io.IOException +import java.lang.invoke.MethodHandles +import java.lang.invoke.MethodType +import java.lang.reflect.Modifier +import java.net.Socket +import java.net.URI +import java.util.* + +object KafkaUtil { + const val labelSeparator = ":" + const val keySeparator = ", " + + @JvmStatic val UNWIND: String = "UNWIND \$events AS event" + + @JvmStatic val WITH_EVENT_FROM: String = "WITH event, from" + + @JvmStatic val LEADER = "LEADER" + + @JvmStatic val SYSTEM_DATABASE_NAME = "system" + + @JvmStatic + private val coreMetadata: Class<*>? = try { + Class.forName("com.neo4j.causalclustering.core.consensus.CoreMetaData") + } catch (e: ClassNotFoundException) { + null + } + + @JvmStatic + private val isLeaderMethodHandle = coreMetadata?.let { + val lookup = MethodHandles.lookup() + lookup.findVirtual(it, "isLeader", MethodType.methodType(Boolean::class.java)) + .asType(MethodType.methodType(Boolean::class.java, Any::class.java)) + } + + fun clusterMemberRole(db: GraphDatabaseAPI): String { + val fallback: (Exception?) -> String = { e: Exception? -> + val userLog = db.dependencyResolver + .resolveDependency(LogService::class.java) + .getUserLog(KafkaUtil::class.java) + e?.let { userLog.warn("Cannot call the APIs, trying with the cypher query", e) } + ?: userLog.warn("Cannot call the APIs, trying with the cypher query") + db.execute("CALL dbms.cluster.role(\$database)", + mapOf("database" to db.databaseName()) + ) { it.columnAs("role").next() } + } + val execute = { + coreMetadata?.let { + try { + val raftMachine: Any = db.dependencyResolver.resolveDependency(coreMetadata) + val isLeader = isLeaderMethodHandle!!.invokeExact(raftMachine) as Boolean + if (isLeader) "LEADER" else "FOLLOWER" + } catch (e: UnsatisfiedDependencyException) { + "LEADER" + } + } ?: "LEADER" + } + return executeOrFallback(execute, fallback) + } + + fun isCluster(db: GraphDatabaseAPI): Boolean = db.mode() != HostedOnMode.SINGLE && db.mode() != HostedOnMode.VIRTUAL + + fun isCluster(dbms: DatabaseManagementService): Boolean = dbms.listDatabases() + .firstOrNull { it != KafkaUtil.SYSTEM_DATABASE_NAME } + ?.let { dbms.database(it) as GraphDatabaseAPI } + ?.let { isCluster(it) } ?: false + + private fun executeOrFallback(execute: () -> T, fallback: (Exception?) -> T): T = try { + execute() + } catch (e: Exception) { + fallback(e) + } + + fun getLabelsAsString(labels: Collection): String = labels + .map { it.quote() } + .joinToString(labelSeparator) + .let { if (it.isNotBlank()) "$labelSeparator$it" else it } + + fun getNodeKeysAsString(prefix: String = "properties", keys: Set): String = keys + .map { toQuotedProperty(prefix, it) } + .joinToString(keySeparator) + + private fun toQuotedProperty(prefix: String = "properties", property: String): String { + val quoted = property.quote() + return "$quoted: event.$prefix.$quoted" + } + + fun getNodeMergeKeys(prefix: String, keys: Set): String = keys + .map { + val quoted = it.quote() + "$quoted: event.$prefix.$quoted" + } + .joinToString(keySeparator) + + fun containsProp(key: String, properties: List): Boolean = if (key.contains(".")) { + properties.contains(key) || properties.any { key.startsWith("$it.") } + } else { + properties.contains(key) + } + + suspend fun retryForException(exceptions: Array>, retries: Int, delayTime: Long, action: () -> T): T { + return try { + action() + } catch (e: Exception) { + val isInstance = exceptions.any { it.isInstance(e) } + if (isInstance && retries > 0) { + delay(delayTime) + retryForException(exceptions = exceptions, retries = retries - 1, delayTime = delayTime, action = action) + } else { + throw e + } + } + } + + fun isServerReachable(url: String, port: Int): Boolean = try { + Socket(url, port).use { true } + } catch (e: IOException) { + false + } + + fun checkServersUnreachable(urls: String, separator: String = ","): List = urls + .split(separator) + .map { + val uri = URI.create(it) + when (uri.host.isNullOrBlank()) { + true -> { + val splitted = it.split(":") + URI("fake-scheme", "", splitted.first(), splitted.last().toInt(), + "", "", "") + } + else -> uri + } + } + .filter { uri -> !isServerReachable(uri.host, uri.port) } + .map { if (it.scheme == "fake-scheme") "${it.host}:${it.port}" else it.toString() } + + fun validateConnection(url: String, kafkaPropertyKey: String, checkReachable: Boolean = true) { + if (url.isBlank()) { + throw RuntimeException("The `kafka.$kafkaPropertyKey` property is empty") + } else if (checkReachable) { + val unreachableServers = checkServersUnreachable(url) + if (unreachableServers.isNotEmpty()) { + throw RuntimeException("The servers defined into the property `kafka.$kafkaPropertyKey` are not reachable: $unreachableServers") + } + } + } + + fun getInvalidTopicsError(invalidTopics: List) = "The BROKER config `auto.create.topics.enable` is false, the following topics need to be created into the Kafka cluster otherwise the messages will be discarded: $invalidTopics" + + fun getInvalidTopics(kafkaProps: Properties, allTopics: List): List = try { + getInvalidTopics(AdminClient.create(kafkaProps), allTopics) + } catch (e: Exception) { + emptyList() + } + + fun getInvalidTopics(client: AdminClient, allTopics: List): List = try { + val kafkaTopics = client.listTopics().names().get() + val invalidTopics = allTopics.filter { !kafkaTopics.contains(it) } + if (invalidTopics.isNotEmpty() && isAutoCreateTopicsEnabled(client)) { + emptyList() + } else { + invalidTopics + } + } catch (e: Exception) { + emptyList() + } + + fun checkEnabled() { + if (!ApocConfig.apocConfig().getBoolean(APOC_KAFKA_ENABLED)) { + throw RuntimeException("In order to use the Kafka procedures you must set ${APOC_KAFKA_ENABLED}=true") + } + } + + fun isAutoCreateTopicsEnabled(kafkaProps: Properties):Boolean = try { + isAutoCreateTopicsEnabled(AdminClient.create(kafkaProps)) + } catch (e: Exception) { + false + } + + fun isAutoCreateTopicsEnabled(client: AdminClient): Boolean = try { + val firstNodeId = client.describeCluster().nodes().get().first().id() + val configResources = listOf(ConfigResource(ConfigResource.Type.BROKER, firstNodeId.toString())) + val configs = client.describeConfigs(configResources).all().get() + configs.values + .flatMap { it.entries() } + .find { it.name() == "auto.create.topics.enable" } + ?.value() + ?.toBoolean() ?: false + } catch (e: Exception) { + false + } + + private fun getConfigProperties(clazz: Class<*>) = clazz.declaredFields + .filter { Modifier.isStatic(it.modifiers) && it.name.endsWith("_CONFIG") } + .map { it.get(null).toString() } + .toSet() + + private fun getBaseConfigs() = (getConfigProperties(CommonClientConfigs::class.java) + + AdminClientConfig.configNames() + + getConfigProperties(SaslConfigs::class.java) + + getConfigProperties(TopicConfig::class.java) + + getConfigProperties(SslConfigs::class.java)) + + fun getProducerProperties() = ProducerConfig.configNames() - getBaseConfigs() + + fun getConsumerProperties() = ConsumerConfig.configNames() - getBaseConfigs() + + fun getNodeKeys(labels: List, propertyKeys: Set, constraints: List, keyStrategy: RelKeyStrategy = RelKeyStrategy.DEFAULT): Set = + constraints + .filter { constraint -> + constraint.type == StreamsConstraintType.UNIQUE + && propertyKeys.containsAll(constraint.properties) + && labels.contains(constraint.label) + } + .let { + when(keyStrategy) { + RelKeyStrategy.DEFAULT -> { + // we order first by properties.size, then by label name and finally by properties name alphabetically + // with properties.sorted() we ensure that ("foo", "bar") and ("bar", "foo") are no different + // with toString() we force it.properties to have the natural sort order, that is alphabetically + it.minWithOrNull((compareBy({ it.properties.size }, { it.label }, { it.properties.sorted().toString() }))) + ?.properties + .orEmpty() + } + // with 'ALL' strategy we get a set with all properties + RelKeyStrategy.ALL -> it.flatMap { it.properties }.toSet() + } + } + + + fun toStreamsTransactionEvent(streamsSinkEntity: StreamsSinkEntity, + evaluation: (StreamsTransactionEvent) -> Boolean) + : StreamsTransactionEvent? = if (streamsSinkEntity.value != null) { + val data = JSONUtils.asStreamsTransactionEvent(streamsSinkEntity.value) + if (evaluation(data)) data else null + } else { + null + } + + fun ignoreExceptions(action: () -> T, vararg toIgnore: Class): T? { + return try { + action() + } catch (e: Throwable) { + if (toIgnore.isEmpty()) { + return null + } + return if (toIgnore.any { it.isInstance(e) }) { + null + } else { + throw e + } + } + } + + fun blockUntilFalseOrTimeout(timeout: Long, delay: Long = 1000, action: () -> Boolean): Boolean = runBlocking { + val start = System.currentTimeMillis() + var success = action() + while (System.currentTimeMillis() - start < timeout && !success) { + delay(delay) + success = action() + } + success + } + + fun getName(db: GraphDatabaseService) = db.databaseName() + + fun isWriteableInstance(db: GraphDatabaseAPI) = apoc.util.Util.isWriteableInstance(db) + + private fun clusterHasLeader(db: GraphDatabaseAPI): Boolean = try { + db.execute(""" + |CALL dbms.cluster.overview() YIELD databases + |RETURN databases[${'$'}database] AS role + """.trimMargin(), mapOf("database" to db.databaseName())) { + it.columnAs("role") + .stream() + .toList() + .contains(KafkaUtil.LEADER) + } + } catch (e: QueryExecutionException) { + if (e.statusCode.equals("Neo.ClientError.Procedure.ProcedureNotFound", ignoreCase = true)) { + false + } + throw e + } + + fun executeInWriteableInstance(db: GraphDatabaseAPI, + action: () -> T?): T? = if (isWriteableInstance(db)) { + action() + } else { + null + } + + fun isClusterCorrectlyFormed(dbms: DatabaseManagementService) = dbms.listDatabases() + .filterNot { it == KafkaUtil.SYSTEM_DATABASE_NAME } + .map { dbms.database(it) as GraphDatabaseAPI } + .all { clusterHasLeader(it) } + + fun waitForTheLeaders(dbms: DatabaseManagementService, log: Log, timeout: Long = 120000, action: () -> Unit) { + GlobalScope.launch(Dispatchers.IO) { + val start = System.currentTimeMillis() + val delay: Long = 2000 + while (!isClusterCorrectlyFormed(dbms) && System.currentTimeMillis() - start < timeout) { + log.info("${KafkaUtil.LEADER} not found, new check comes in $delay milliseconds...") + delay(delay) + } + action() + } + } +} \ No newline at end of file diff --git a/extended/src/main/kotlin/apoc/kafka/utils/Neo4jUtils.kt b/extended/src/main/kotlin/apoc/kafka/utils/Neo4jUtils.kt new file mode 100644 index 0000000000..845f3f96a9 --- /dev/null +++ b/extended/src/main/kotlin/apoc/kafka/utils/Neo4jUtils.kt @@ -0,0 +1 @@ +package apoc.kafka.utils diff --git a/extended/src/test/java/apoc/custom/CypherProcedureTestUtil.java b/extended/src/test/java/apoc/custom/CypherProcedureTestUtil.java index 8a627996b3..0c22ddafef 100644 --- a/extended/src/test/java/apoc/custom/CypherProcedureTestUtil.java +++ b/extended/src/test/java/apoc/custom/CypherProcedureTestUtil.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.Map; +import static apoc.ExtendedApocConfig.APOC_KAFKA_ENABLED; import static apoc.custom.CypherProceduresHandler.*; import static apoc.util.DbmsTestUtil.startDbWithApocConfigs; import static apoc.util.SystemDbTestUtil.PROCEDURE_DEFAULT_REFRESH; @@ -20,7 +21,10 @@ public class CypherProcedureTestUtil { public final static String QUERY_CREATE = "RETURN $input1 + $input2 as answer"; public static DatabaseManagementService startDbWithCustomApocConfigs(TemporaryFolder storeDir) throws IOException { return startDbWithApocConfigs(storeDir, - Map.of(CUSTOM_PROCEDURES_REFRESH, PROCEDURE_DEFAULT_REFRESH) + Map.of( + CUSTOM_PROCEDURES_REFRESH, PROCEDURE_DEFAULT_REFRESH, + APOC_KAFKA_ENABLED, "true" + ) ); } diff --git a/extended/src/test/java/apoc/custom/CypherProceduresStorageTest.java b/extended/src/test/java/apoc/custom/CypherProceduresStorageTest.java index 40a85b8186..2685e0ec02 100644 --- a/extended/src/test/java/apoc/custom/CypherProceduresStorageTest.java +++ b/extended/src/test/java/apoc/custom/CypherProceduresStorageTest.java @@ -25,7 +25,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -import static apoc.custom.CypherProcedureTestUtil.QUERY_CREATE; +import static apoc.ExtendedApocConfig.APOC_KAFKA_ENABLED; import static apoc.custom.CypherProceduresHandler.CUSTOM_PROCEDURES_REFRESH; import static apoc.util.DbmsTestUtil.startDbWithApocConfigs; import static apoc.util.MapUtil.map; @@ -57,7 +57,10 @@ public void setUp() throws Exception { final int refreshTime = 3000; // start db with apoc.conf: `apoc.custom.procedures.refresh=