feat(search): find search terms across all keys

This ensures we properly handle fuzzy results (again), where parts of the search is matched across different keys. This ensures we're not overly strict in filtering things. Related to bpmn-io/bpmn-js#2235
bpmn-io · Nov 4, 2024 · 0b6a26b · 0b6a26b
1 parent e53ecb7
commit 0b6a26b
Show file tree

Hide file tree

Showing 2 changed files with 235 additions and 109 deletions.
diff --git a/lib/features/search/search.js b/lib/features/search/search.js
@@ -32,16 +32,12 @@ export default function search(items, pattern, options) {
     keys
   } = options;
 
-  // drop leading and trailing whitespace
-  pattern = pattern.trim();
+  const words = pattern.trim().toLowerCase().split(/\s+/);
 
   return items.flatMap((item) => {
-    const {
-      __matches: matches,
-      ...tokens
-    } = getTokens(item, pattern, keys);
+    const tokens = matchItem(item, words, keys);
 
-    if (!matches) {
+    if (!tokens) {
       return [];
     }
 
@@ -53,26 +49,48 @@ export default function search(items, pattern, options) {
 }
 
 /**
- * Get tokens for item.
+ * Match an item and return tokens in case of a match.
  *
  * @param {Object} item
- * @param {string} pattern
+ * @param {string[]} words
  * @param {string[]} keys
  *
  * @returns {Record<string, Tokens>}
  */
-function getTokens(item, pattern, keys) {
-  return keys.reduce((results, key) => {
+function matchItem(item, words, keys) {
+
+  const {
+    matchedWords,
+    tokens
+  } = keys.reduce((result, key) => {
     const string = item[ key ];
 
-    const tokens = getMatchingTokens(string, pattern);
+    const {
+      tokens,
+      matchedWords
+    } = matchString(string, words);
 
     return {
-      ...results,
-      [ key ]: tokens,
-      __matches: results.__matches || hasMatch(tokens)
+      tokens: {
+        ...result.tokens,
+        [ key ]: tokens,
+      },
+      matchedWords: {
+        ...result.matchedWords,
+        ...matchedWords
+      }
     };
-  }, { });
+  }, {
+    matchedWords: {},
+    tokens: {}
+  });
+
+  // only return result if every word got matched
+  if (Object.keys(matchedWords).length !== words.length) {
+    return null;
+  }
+
+  return tokens;
 }
 
 /**
@@ -118,119 +136,128 @@ function createResultSorter(keys) {
 }
 
 /**
-* @param {Token} token
-*
-* @return {boolean}
-*/
-export function isMatch(token) {
-  return token.match;
+ * Compares two token arrays.
+ *
+ * @param {Token[]} [tokensA]
+ * @param {Token[]} [tokensB]
+ *
+ * @returns {number}
+ */
+function compareTokens(tokensA, tokensB) {
+  return scoreTokens(tokensB) - scoreTokens(tokensA);
 }
 
 /**
-* @param {Token[]} tokens
-*
-* @return {boolean}
-*/
-export function hasMatch(tokens) {
-  return tokens.find(isMatch);
+ * @param { Token[] } tokens
+ * @returns { number }
+ */
+function scoreTokens(tokens) {
+  return tokens.reduce((sum, token) => sum + scoreToken(token), 0);
 }
 
 /**
-* Compares two token arrays.
-*
-* @param {Token[]} [tokensA]
-* @param {Token[]} [tokensB]
-*
-* @returns {number}
-*/
-export function compareTokens(tokensA, tokensB) {
-
-  const tokensAHasMatch = tokensA && hasMatch(tokensA),
-        tokensBHasMatch = tokensB && hasMatch(tokensB);
-
-  if (tokensAHasMatch && !tokensBHasMatch) {
-    return -1;
-  }
-
-  if (!tokensAHasMatch && tokensBHasMatch) {
-    return 1;
-  }
-
-  if (!tokensAHasMatch && !tokensBHasMatch) {
+ * Score a token.
+ *
+ * @param { Token } token
+ *
+ * @returns { number }
+ */
+function scoreToken(token) {
+  if (!token.match) {
     return 0;
   }
 
-  const tokensAFirstMatch = tokensA.find(isMatch),
-        tokensBFirstMatch = tokensB.find(isMatch);
-
-  if (tokensAFirstMatch.index < tokensBFirstMatch.index) {
-    return -1;
-  }
-
-  if (tokensAFirstMatch.index > tokensBFirstMatch.index) {
-    return 1;
-  }
-
-  return 0;
+  return token.start
+    ? 1.37
+    : token.wordStart
+      ? 1.13
+      : 1;
 }
 
 /**
-* Compares two strings.
-*
-* @param {string} [a = '']
-* @param {string} [b = '']
-*
-* @returns {number}
-*/
-export function compareStrings(a = '', b = '') {
+ * Compares two strings.
+ *
+ * @param {string} [a = '']
+ * @param {string} [b = '']
+ *
+ * @returns {number}
+ */
+function compareStrings(a = '', b = '') {
   return a.localeCompare(b);
 }
 
 /**
-* @param {string} string
-* @param {string} pattern
-*
-* @return {Token[]}
-*/
-export function getMatchingTokens(string, pattern) {
-  var tokens = [],
-      originalString = string;
+ * Match a given string against a set of words,
+ * and return the result.
+ *
+ * @param {string} string
+ * @param {string[]} words
+ *
+ * @return { {
+ *   tokens: Token[],
+ *   matchedWords: Record<string, boolean>
+ * } }
+ */
+function matchString(string, words) {
 
   if (!string) {
-    return tokens;
+    return {
+      tokens: [],
+      matchedWords: {}
+    };
   }
 
-  string = string.toLowerCase();
-  pattern = pattern.toLowerCase();
+  const tokens = [];
+  const matchedWords = {};
+
+  const regexpString = words.map(escapeRegexp).flatMap(str => [ '(?<wordStart>\\b' + str + ')', str ]).join('|');
+
+  const regexp = new RegExp(regexpString, 'ig');
+
+  let match;
+  let lastIndex = 0;
 
-  var index = string.indexOf(pattern);
+  while ((match = regexp.exec(string))) {
 
-  if (index > -1) {
-    if (index !== 0) {
+    const [ value ] = match;
+
+    if (match.index > lastIndex) {
+
+      // add previous token (NO match)
       tokens.push({
-        value: originalString.slice(0, index),
-        index: 0
+        value: string.slice(lastIndex, match.index),
+        index: lastIndex
       });
     }
 
+    // add current token (match)
     tokens.push({
-      value: originalString.slice(index, index + pattern.length),
-      index: index,
-      match: true
+      value,
+      index: match.index,
+      match: true,
+      wordStart: !!match.groups.wordStart,
+      start: match.index === 0
     });
 
-    if (pattern.length + index < string.length) {
-      tokens.push({
-        value: originalString.slice(index + pattern.length),
-        index: index + pattern.length
-      });
-    }
-  } else {
+    matchedWords[value.toLowerCase()] = true;
+
+    lastIndex = match.index + value.length;
+  }
+
+  // add after token (NO match)
+  if (lastIndex < string.length) {
     tokens.push({
-      value: originalString,
-      index: 0
+      value: string.slice(lastIndex),
+      index: lastIndex
     });
   }
 
-  return tokens;
+  return {
+    tokens,
+    matchedWords
+  };
+}
+
+function escapeRegexp(string) {
+  return string.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
 }