Skip to content

Commit

Permalink
feat: Allow To use ngram_tokenizer when indexing a Field - MEED-7667 -
Browse files Browse the repository at this point in the history
…Meeds-io/meeds#2523 (#663)

This change will allow to index a field in order to use an autocomplete
behavior with partial matching of a word.
  • Loading branch information
boubaker committed Oct 25, 2024
1 parent db1978a commit 92acacd
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -731,17 +731,22 @@ private void initConnectors() {

boolean needsUpgrade = false;
if (StringUtils.isNotBlank(previousIndex)) {
if (connector.getPreviousIndices() != null) {
previousIndex = connector.getPreviousIndices()
.stream()
.filter(i -> elasticIndexingClient.sendIsIndexExistsRequest(i))
.findFirst()
.orElse(previousIndex);
connector.setPreviousIndex(previousIndex);
}
// Need to check the upgrade status (incomplete/ not run == new index doesn't exist or indexAlias is not added to new index)
needsUpgrade = elasticIndexingClient.sendIsIndexExistsRequest(previousIndex)
&& (!elasticIndexingClient.sendIsIndexExistsRequest(index)
|| !elasticIndexingClient.sendGetIndexAliasesRequest(index).contains(indexAlias));
}

if(needsUpgrade) {
if(!indexUpgrading.containsKey(indexAlias)) {
indexUpgrading.put(indexAlias, new HashSet<>());
}
indexUpgrading.get(indexAlias).add(entry.getKey());
if (needsUpgrade) {
indexUpgrading.computeIfAbsent(indexAlias, k -> new HashSet<>()).add(entry.getKey());
}
}
for (Map.Entry<String, IndexingServiceConnector> entry : getConnectors().entrySet()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package org.exoplatform.commons.search.index.impl;

import java.io.InputStream;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.json.simple.JSONObject;
Expand All @@ -31,6 +33,8 @@
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;

import lombok.Getter;

/**
* Created by The eXo Platform SAS
* Author : Thibault Clement
Expand All @@ -48,19 +52,31 @@ public abstract class ElasticIndexingServiceConnector extends IndexingServiceCon

private static final String MAPPING_FILE_PATH_PARAM = "mapping.file.path";

protected String indexAlias;
protected String currentIndex;
protected String previousIndex;
protected String mapping;
protected boolean reindexOnUpgrade;
protected Integer shards = SHARDS_NUMBER_DEFAULT;
protected Integer replicas = REPLICAS_NUMBER_DEFAULT;
protected String indexAlias;

protected String currentIndex;

protected String previousIndex;

@Getter
protected List<String> previousIndices;

protected String mapping;

protected boolean reindexOnUpgrade;

protected Integer shards = SHARDS_NUMBER_DEFAULT;

protected Integer replicas = REPLICAS_NUMBER_DEFAULT;

protected ElasticIndexingServiceConnector(InitParams initParams) {
PropertiesParam param = initParams.getPropertiesParam("constructor.params");

this.currentIndex = param.getProperty("index_current");
this.previousIndex = param.getProperty("index_previous");
if (StringUtils.contains(this.previousIndex, ",")) {
this.previousIndices = Arrays.asList(this.previousIndex.split(","));
}
String reindexOnUpgradeString = param.getProperty("reindexOnUpgrade");
if (StringUtils.isBlank(this.currentIndex)) {
throw new IllegalStateException("Connector ES index name is mandatory.");
Expand Down
23 changes: 23 additions & 0 deletions commons-search/src/main/resources/es-default-index-settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"number_of_shards": shard.number,
"number_of_replicas": replica.number,
"max_regex_length": max_regex.length,
"max_ngram_diff" : "50",
"analysis":{
"analyzer":{
"default":{
Expand Down Expand Up @@ -38,6 +39,28 @@
"char_filter": [
"html_strip"
]
},
"ngram_analyzer":{
"type":"custom",
"tokenizer":"ngram_tokenizer",
"filter":[
"lowercase",
"asciifolding"
]
},
"ngram_analyzer_search": {
"tokenizer": "lowercase"
}
},
"tokenizer":{
"ngram_tokenizer":{
"type": "ngram",
"min_gram": 1,
"max_gram": 50,
"token_chars": [
"letter",
"digit"
]
}
}
}
Expand Down

0 comments on commit 92acacd

Please sign in to comment.