Skip to content

Commit

Permalink
feat(search): find search terms across all keys
Browse files Browse the repository at this point in the history
This ensures we properly handle fuzzy results (again),
where parts of the search is matched across different
keys.

This ensures we're not overly strict in filtering things.

Related to bpmn-io/bpmn-js#2235
  • Loading branch information
nikku committed Nov 1, 2024
1 parent 7b966ce commit 1e47d13
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 111 deletions.
229 changes: 129 additions & 100 deletions lib/features/search/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,12 @@ export default function search(items, pattern, options) {
keys
} = options;

// drop leading and trailing whitespace
pattern = pattern.trim();
const words = pattern.trim().toLowerCase().split(/\s+/);

return items.flatMap((item, idx) => {
const {
__matches: matches,
...tokens
} = getTokens(item, pattern, keys);
const tokens = matchItem(item, words, keys);

if (!matches) {
if (!tokens) {
return [];
}

Expand All @@ -53,26 +49,48 @@ export default function search(items, pattern, options) {
}

/**
* Get tokens for item.
* Match an item and return tokens in case of a match.
*
* @param {Object} item
* @param {string} pattern
* @param {string[]} words
* @param {string[]} keys
*
* @returns {Record<string, Tokens>}
*/
function getTokens(item, pattern, keys) {
return keys.reduce((results, key) => {
function matchItem(item, words, keys) {

const {
matchedWords,
tokens
} = keys.reduce((result, key) => {
const string = item[ key ];

const tokens = getMatchingTokens(string, pattern);
const {
tokens,
matchedWords
} = matchString(string, words);

return {
...results,
[ key ]: tokens,
__matches: results.__matches || hasMatch(tokens)
tokens: {
...result.tokens,
[ key ]: tokens,
},
matchedWords: {
...result.matchedWords,
...matchedWords
}
};
}, { });
}, {
matchedWords: {},
tokens: {}
});

// only return result if ever word got matched
if (Object.keys(matchedWords).length !== words.length) {
return null;
}

return tokens;
}

/**
Expand Down Expand Up @@ -118,119 +136,130 @@ function createResultSorter(keys) {
}

/**
* @param {Token} token
*
* @return {boolean}
*/
export function isMatch(token) {
return token.match;
* Compares two token arrays.
*
* @param {Token[]} [tokensA]
* @param {Token[]} [tokensB]
*
* @returns {number}
*/
function compareTokens(tokensA, tokensB) {
return scoreTokens(tokensB) - scoreTokens(tokensA);
}

/**
* @param {Token[]} tokens
*
* @return {boolean}
*/
export function hasMatch(tokens) {
return tokens.find(isMatch);
* @param { Token[] } tokens
* @returns { number }
*/
function scoreTokens(tokens) {
return tokens.reduce((sum, token) => sum + scoreToken(token), 0);
}

/**
* Compares two token arrays.
*
* @param {Token[]} [tokensA]
* @param {Token[]} [tokensB]
*
* @returns {number}
*/
export function compareTokens(tokensA, tokensB) {

const tokensAHasMatch = tokensA && hasMatch(tokensA),
tokensBHasMatch = tokensB && hasMatch(tokensB);

if (tokensAHasMatch && !tokensBHasMatch) {
return -1;
}

if (!tokensAHasMatch && tokensBHasMatch) {
return 1;
}

if (!tokensAHasMatch && !tokensBHasMatch) {
* Score a token.
*
* @param { Token } token
*
* @returns { number }
*/
function scoreToken(token) {
if (!token.match) {
return 0;
}

const tokensAFirstMatch = tokensA.find(isMatch),
tokensBFirstMatch = tokensB.find(isMatch);

if (tokensAFirstMatch.index < tokensBFirstMatch.index) {
return -1;
}

if (tokensAFirstMatch.index > tokensBFirstMatch.index) {
return 1;
}

return 0;
return token.start
? 1.37
: token.wordStart
? 1.13
: 1;
}

/**
* Compares two strings.
*
* @param {string} [a = '']
* @param {string} [b = '']
*
* @returns {number}
*/
export function compareStrings(a = '', b = '') {
* Compares two strings.
*
* @param {string} [a = '']
* @param {string} [b = '']
*
* @returns {number}
*/
function compareStrings(a = '', b = '') {
return a.localeCompare(b);
}

/**
* @param {string} string
* @param {string} pattern
*
* @return {Token[]}
*/
export function getMatchingTokens(string, pattern) {
var tokens = [],
originalString = string;
* Match a given string against a set of words,
* and return the result.
*
* @param {string} string
* @param {string[]} words
*
* @return { {
* tokens: Token[],
* matchedWords: Record<string, boolean>
* } }
*/
function matchString(string, words) {

if (!string) {
return tokens;
return {
tokens: [],
matchedWords: {}
};
}

string = string.toLowerCase();
pattern = pattern.toLowerCase();
const tokens = [];
const matchedWords = {};

const regexpString = words.map(escapeRegexp).flatMap(str => [ '(?<wordStart>\\b' + str + ')', str ]).join('|');

const regexp = new RegExp(regexpString, 'ig');

let match;
let lastIndex = 0;

var index = string.indexOf(pattern);
while ((match = regexp.exec(string))) {

if (index > -1) {
if (index !== 0) {
tokens.push({
value: originalString.slice(0, index),
index: 0
});
const [ value ] = match;

if (match.index > lastIndex) {

// add previous token (NO match)
if (match.index !== 0) {
tokens.push({
value: string.slice(lastIndex, match.index),
index: lastIndex
});
}
}

// add current token (match)
tokens.push({
value: originalString.slice(index, index + pattern.length),
index: index,
match: true
value,
index: match.index,
match: true,
wordStart: !!match.groups.wordStart,
start: match.index === 0
});

if (pattern.length + index < string.length) {
tokens.push({
value: originalString.slice(index + pattern.length),
index: index + pattern.length
});
}
} else {
matchedWords[value.toLowerCase()] = true;

lastIndex = match.index + value.length;
}

// add after token (NO match)
if (lastIndex < string.length) {
tokens.push({
value: originalString,
index: 0
value: string.slice(lastIndex),
index: lastIndex
});
}

return tokens;
return {
tokens,
matchedWords
};
}

function escapeRegexp(string) {
return string.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
}
Loading

0 comments on commit 1e47d13

Please sign in to comment.