feat: Allow To use ngram_tokenizer when indexing a Field - MEED-7667 - …

…Meeds-io/meeds#2523 (#663) This change will allow to index a field in order to use an autocomplete behavior with partial matching of a word.
Meeds-io · Oct 25, 2024 · 92acacd · 92acacd
1 parent db1978a
commit 92acacd
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 12 deletions.
diff --git a/...ain/java/org/exoplatform/commons/search/index/impl/ElasticIndexingOperationProcessor.java b/...ain/java/org/exoplatform/commons/search/index/impl/ElasticIndexingOperationProcessor.java
@@ -731,17 +731,22 @@ private void initConnectors() {
 
       boolean needsUpgrade = false;
       if (StringUtils.isNotBlank(previousIndex)) {
+        if (connector.getPreviousIndices() != null) {
+          previousIndex = connector.getPreviousIndices()
+                                   .stream()
+                                   .filter(i -> elasticIndexingClient.sendIsIndexExistsRequest(i))
+                                   .findFirst()
+                                   .orElse(previousIndex);
+          connector.setPreviousIndex(previousIndex);
+        }
         // Need to check the upgrade status (incomplete/ not run == new index doesn't exist or indexAlias is not added to new index)
         needsUpgrade = elasticIndexingClient.sendIsIndexExistsRequest(previousIndex)
                 && (!elasticIndexingClient.sendIsIndexExistsRequest(index)
                 || !elasticIndexingClient.sendGetIndexAliasesRequest(index).contains(indexAlias));
       }
 
-      if(needsUpgrade) {
-        if(!indexUpgrading.containsKey(indexAlias)) {
-          indexUpgrading.put(indexAlias, new HashSet<>());
-        }
-        indexUpgrading.get(indexAlias).add(entry.getKey());
+      if (needsUpgrade) {
+        indexUpgrading.computeIfAbsent(indexAlias, k -> new HashSet<>()).add(entry.getKey());
       }
     }
     for (Map.Entry<String, IndexingServiceConnector> entry : getConnectors().entrySet()) {

diff --git a/.../main/java/org/exoplatform/commons/search/index/impl/ElasticIndexingServiceConnector.java b/.../main/java/org/exoplatform/commons/search/index/impl/ElasticIndexingServiceConnector.java
@@ -17,6 +17,8 @@
 package org.exoplatform.commons.search.index.impl;
 
 import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
 
 import org.apache.commons.lang3.StringUtils;
 import org.json.simple.JSONObject;
@@ -31,6 +33,8 @@
 import org.exoplatform.services.log.ExoLogger;
 import org.exoplatform.services.log.Log;
 
+import lombok.Getter;
+
 /**
  * Created by The eXo Platform SAS
  * Author : Thibault Clement
@@ -48,19 +52,31 @@ public abstract class ElasticIndexingServiceConnector extends IndexingServiceCon
 
   private static final String  MAPPING_FILE_PATH_PARAM       = "mapping.file.path";
 
-  protected String indexAlias;
-  protected String currentIndex;
-  protected String previousIndex;
-  protected String mapping;
-  protected boolean reindexOnUpgrade;
-  protected Integer shards = SHARDS_NUMBER_DEFAULT;
-  protected Integer replicas = REPLICAS_NUMBER_DEFAULT;
+  protected String             indexAlias;
+
+  protected String             currentIndex;
+
+  protected String             previousIndex;
+
+  @Getter
+  protected List<String>       previousIndices;
+
+  protected String             mapping;
+
+  protected boolean            reindexOnUpgrade;
+
+  protected Integer            shards                        = SHARDS_NUMBER_DEFAULT;
+
+  protected Integer            replicas                      = REPLICAS_NUMBER_DEFAULT;
 
   protected ElasticIndexingServiceConnector(InitParams initParams) {
     PropertiesParam param = initParams.getPropertiesParam("constructor.params");
 
     this.currentIndex = param.getProperty("index_current");
     this.previousIndex = param.getProperty("index_previous");
+    if (StringUtils.contains(this.previousIndex, ",")) {
+      this.previousIndices = Arrays.asList(this.previousIndex.split(","));
+    }
     String reindexOnUpgradeString = param.getProperty("reindexOnUpgrade");
     if (StringUtils.isBlank(this.currentIndex)) {
       throw new IllegalStateException("Connector ES index name is mandatory.");

diff --git a/commons-search/src/main/resources/es-default-index-settings.json b/commons-search/src/main/resources/es-default-index-settings.json
@@ -2,6 +2,7 @@
    "number_of_shards": shard.number,
    "number_of_replicas": replica.number,
    "max_regex_length": max_regex.length,
+   "max_ngram_diff" : "50",
    "analysis":{
       "analyzer":{
          "default":{
@@ -38,6 +39,28 @@
             "char_filter": [
                "html_strip"
             ]
+         },
+         "ngram_analyzer":{
+            "type":"custom",
+            "tokenizer":"ngram_tokenizer",
+            "filter":[
+               "lowercase",
+               "asciifolding"
+            ]
+         },
+         "ngram_analyzer_search": {
+           "tokenizer": "lowercase"
+         }
+      },
+      "tokenizer":{
+         "ngram_tokenizer":{
+           "type": "ngram",
+           "min_gram": 1,
+           "max_gram": 50,
+           "token_chars": [
+             "letter",
+             "digit"
+           ]
          }
       }
    }