Skip to content

Commit

Permalink
feat(search): match fuzzy
Browse files Browse the repository at this point in the history
This ensures we properly handle fuzzy results (again), i.e. when
providing multiple search strings, return results that match on a
best effort basis, rather than being overly strict:

items = [ 'foo', 'bar' ]
search = 'foo bar'
result = [ 'foo', 'bar' ]

The tokens allow folks to make sense of the actual parts of the results
matched, and allow them to refine the search as they see fit.

Along with this results are still scored, i.e. most reasonable results
are first shown to the users.

Related to bpmn-io/bpmn-js#2235
  • Loading branch information
nikku committed Oct 31, 2024
1 parent 043051b commit 7dc9c9b
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 65 deletions.
125 changes: 72 additions & 53 deletions lib/features/search/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -144,44 +144,46 @@ export function hasMatch(tokens) {
* @returns {number}
*/
export function compareTokens(tokensA, tokensB) {
return scoreTokens(tokensB) - scoreTokens(tokensA);
}

const tokensAHasMatch = tokensA && hasMatch(tokensA),
tokensBHasMatch = tokensB && hasMatch(tokensB);

if (tokensAHasMatch && !tokensBHasMatch) {
return -1;
}

if (!tokensAHasMatch && tokensBHasMatch) {
return 1;
}
/**
* @param { Token[] } tokens
* @returns { number }
*/
function scoreTokens(tokens) {
return tokens.reduce((sum, token) => sum + scoreToken(token), 0);
}

if (!tokensAHasMatch && !tokensBHasMatch) {
/**
* Score a token.
*
* @param { Token } token
*
* @returns { number }
*/
function scoreToken(token) {
if (!token.match) {
return 0;
}

const tokensAFirstMatch = tokensA.find(isMatch),
tokensBFirstMatch = tokensB.find(isMatch);

if (tokensAFirstMatch.index < tokensBFirstMatch.index) {
return -1;
}

if (tokensAFirstMatch.index > tokensBFirstMatch.index) {
return 1;
}
const modifier = token.start
? 1.37
: token.wordStart
? 1.13
: 1;

return 0;
return token.value.length * modifier;
}

/**
* Compares two strings.
*
* @param {string} [a = '']
* @param {string} [b = '']
*
* @returns {number}
*/
* Compares two strings.
*
* @param {string} [a = '']
* @param {string} [b = '']
*
* @returns {number}
*/
export function compareStrings(a = '', b = '') {
return a.localeCompare(b);
}
Expand All @@ -193,44 +195,61 @@ export function compareStrings(a = '', b = '') {
* @return {Token[]}
*/
export function getMatchingTokens(string, pattern) {
var tokens = [],
originalString = string;

if (!string) {
return tokens;
return [];
}

string = string.toLowerCase();
pattern = pattern.toLowerCase();
const tokens = [];

const regexpString = [
pattern,
...pattern.split(/\s+/).filter(s => s.length > 1)
].map(escapeRegexp).flatMap(str => [ '(?<wordStart>\\b' + str + ')', str ]).join('|');

const regexp = new RegExp(regexpString, 'ig');

let match;
let lastIndex = 0;

var index = string.indexOf(pattern);
while ((match = regexp.exec(string))) {

if (index > -1) {
if (index !== 0) {
tokens.push({
value: originalString.slice(0, index),
index: 0
});
const [ value ] = match;

if (match.index > lastIndex) {

// add previous token (NO match)
if (match.index !== 0) {
tokens.push({
value: string.slice(lastIndex, match.index),
index: lastIndex
});
}
}

// add current token (match)
tokens.push({
value: originalString.slice(index, index + pattern.length),
index: index,
match: true
value,
index: match.index,
match: true,
wordStart: !!match.groups.wordStart,
start: match.index === 0
});

if (pattern.length + index < string.length) {
tokens.push({
value: originalString.slice(index + pattern.length),
index: index + pattern.length
});
}
} else {
lastIndex = match.index + value.length;
}

// add after token (NO match)
if (lastIndex < string.length) {
tokens.push({
value: originalString,
index: 0
value: string.slice(lastIndex),
index: lastIndex
});
}

return tokens;
}

function escapeRegexp(string) {
return string.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
}
147 changes: 135 additions & 12 deletions test/spec/features/search/searchSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,31 @@ describe('search', function() {
}));


it('should search simple', inject(function(search) {

// given
const items = [
{
title: 'foo',
description: 'woop'
},
{
title: 'foobar'
}
];

const searchItems = (items, term) => search(items, term, {
keys: [
'title',
'description'
]
});

// then
expect(searchItems(items, 'foo')).to.have.length(2);
expect(searchItems(items, 'bar')).to.have.length(1);
expect(searchItems(items, 'other')).to.have.length(0);
}));


describe('result', function() {
Expand Down Expand Up @@ -75,6 +100,8 @@ describe('search', function() {
}));

});


it('should search complex', inject(function(search) {

// given
Expand Down Expand Up @@ -156,7 +183,7 @@ describe('search', function() {
}));


it('should sort by match location', inject(function(search) {
it('should prioritize start of word', inject(function(search) {

// given
const items = [
Expand Down Expand Up @@ -185,8 +212,34 @@ describe('search', function() {
// then
expect(results).to.have.length(3);
expect(results[0].item).to.eql(items[1]);
expect(results[1].item).to.eql(items[2]);
expect(results[2].item).to.eql(items[0]);
expect(results[1].item).to.eql(items[0]);
expect(results[2].item).to.eql(items[2]);
}));


it('should prioritize start of term', inject(function(search) {

// given
const items = [
{
title: 'yes barfoo'
},
{
title: 'yes foowoo'
}
];

// when
const results = search(items, 'foo', {
keys: [
'title'
]
});

// then
expect(results).to.have.length(2);
expect(results[0].item).to.eql(items[1]);
expect(results[1].item).to.eql(items[0]);
}));


Expand Down Expand Up @@ -288,29 +341,99 @@ describe('search', function() {
}));


it('should match partial tokens', inject(function(search) {
it('should match case insensitive', inject(function(search) {

// given
const items = [
{
title: 'baz',
description: 'baz'
},
title: 'KAFKAF'
}
];

// when
const results = search(items, 'kaf', {
keys: [
'title'
]
});

// then
expect(results).to.have.length(1);
expect(results[0].item).to.eql(items[0]);
}));


it('should match partial tokens', inject(function(search) {

// given
const items = [
{
title: 'Kafka amess',
description: 'Nope'
},
{
title: 'Kaboom'
title: 'mess'
}
];

// when
const results = search(items, 'Kaf mess', {
keys: [
'title',
'description',
'search'
]
});

// then
expect(results).to.have.length(2);
expect(results[0].item).to.eql(items[0]);
expect(results[1].item).to.eql(items[1]);
}));


it('should prioritize longest match', inject(function(search) {

// given
const items = [
{
title: 'baz'
},
{
title: 'Kafka message',
description: 'Nope'
title: 'baba'
}
];

// when
const results = search(items, 'Kaf mess', {
const results = search(items, 'baz baba', {
keys: [
'title',
'description',
'search'
]
});

// then
expect(results).to.have.length(2);
expect(results[0].item).to.eql(items[1]);
expect(results[1].item).to.eql(items[0]);
}));


it('should match with spaces', inject(function(search) {

// given
const items = [
{
title: 'bar foo bar'
},
{
title: 'other bar foo'
}
];

// when
const results = search(items, 'foo bar', {
keys: [
'title',
'description',
Expand All @@ -320,7 +443,7 @@ describe('search', function() {

// then
expect(results).to.have.length(2);
expect(results[0].item).to.eql(items[3]);
expect(results[0].item).to.eql(items[0]);
expect(results[1].item).to.eql(items[1]);
}));

Expand Down

0 comments on commit 7dc9c9b

Please sign in to comment.