Skip to content

Commit

Permalink
Feat/return-filename-with-text-snippets (#7373)
Browse files Browse the repository at this point in the history
* index filename with fulltext object

* return filename on eleasticsearch query

* add migrations to apply reindex
  • Loading branch information
Joao-vi authored Oct 17, 2024
1 parent 45739dd commit 2c5c501
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { Db } from 'mongodb';

export default {
delta: 169,

name: 'reindex_persist_filename_with_fullText_object',

description:
"We're now indexing document.filename within the fullText object on elasticsearch, this is usefull because on search/v2 endpoint we need to return which filename each text snippet belongs to.",

reindex: true,

// eslint-disable-next-line @typescript-eslint/no-unused-vars
async up(db: Db) {
process.stdout.write(`${this.name}...\r\n`);
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { Db } from 'mongodb';

import testingDB from 'api/utils/testing_db';
import migration from '../index';
import { Fixture } from '../types';
import { fixtures } from './fixtures';

let db: Db | null;

const initTest = async (fixture: Fixture) => {
await testingDB.setupFixturesAndContext(fixture);
db = testingDB.mongodb!;
await migration.up(db);
};

beforeAll(async () => {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
jest.spyOn(process.stdout, 'write').mockImplementation((str: string | Uint8Array) => true);
});

afterAll(async () => {
await testingDB.tearDown();
});

describe('migration test', () => {
beforeAll(async () => {
await initTest(fixtures);
});

it('should have a delta number', () => {
expect(migration.delta).toBe(169);
});

it('should check if a reindex is needed', async () => {
expect(migration.reindex).toBe(true);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { ObjectId } from 'mongodb';
import { Fixture } from '../types';

const fixtures: Fixture = {
entities: [
{
_id: new ObjectId(),
title: 'test_doc',
},
],
};

export { fixtures };
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { ObjectId } from 'mongodb';

interface Entity {
_id: ObjectId;
title: string;
[k: string]: unknown | undefined;
}

interface Fixture {
entities: Entity[];
}

export type { Entity, Fixture };
4 changes: 3 additions & 1 deletion app/api/search.v2/buildQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ const fullTextSearch = (
type: 'fullText',
score_mode: 'max',
inner_hits: {
_source: false,
_source: {
excludes: ['fullText*'],
},
...snippetsHighlight(query, [{ 'fullText_*': {} }]),
},
query: {
Expand Down
5 changes: 3 additions & 2 deletions app/api/search.v2/searchResponse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ function getSnippetsForNonFullText(hit: ElasticHit<EntitySchema>) {
}

function extractFullTextSnippets(hit: ElasticHit<EntitySchema>) {
const fullTextSnippets: { text: string; page: number }[] = [];
const fullTextSnippets: { text: string; page: number; filename: string }[] = [];

if (hit.inner_hits && hit.inner_hits.fullText.hits.hits[0]?.highlight) {
const { highlight } = hit.inner_hits.fullText.hits.hits[0];
const { highlight, _source } = hit.inner_hits.fullText.hits.hits[0];
const regex = /\[{2}(\d+)]{2}/g;

Object.values<string[]>(highlight).forEach(snippets => {
Expand All @@ -24,6 +24,7 @@ function extractFullTextSnippets(hit: ElasticHit<EntitySchema>) {
fullTextSnippets.push({
text: snippet.replace(regex, ''),
page: matches ? Number(matches[1]) : 0,
filename: _source.filename,
});
});
});
Expand Down
12 changes: 9 additions & 3 deletions app/api/search.v2/specs/snippetsSearch.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ describe('searchSnippets', () => {
count: 2,
metadata: [],
fullText: [
{ page: 2, text: matches },
{ page: 4, text: matches },
{ page: 2, text: matches, filename: 'entity1SharedId.pdf' },
{ page: 4, text: matches, filename: 'entity1SharedId.pdf' },
],
},
}),
Expand Down Expand Up @@ -97,7 +97,13 @@ describe('searchSnippets', () => {
snippets: {
count: 1,
metadata: [],
fullText: [{ page: 2, text: expect.stringContaining('<b>searched:term</b>') }],
fullText: [
{
page: 2,
text: expect.stringContaining('<b>searched:term</b>'),
filename: 'entity4SharedId.pdf',
},
],
},
}),
];
Expand Down
2 changes: 1 addition & 1 deletion app/api/search/elasticTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export interface ElasticHit<T> {
fields?: any;
highlight?: any;
// eslint-disable-next-line camelcase
inner_hits?: { fullText: { hits: { hits: [{ highlight: {} }] } } };
inner_hits?: { fullText: { hits: { hits: [{ highlight: {}; _source: Record<string, any> }] } } };
// eslint-disable-next-line camelcase
matched_queries?: string[];
sort?: string[];
Expand Down
1 change: 1 addition & 0 deletions app/api/search/entitiesIndex.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ function setFullTextSettings(defaultDocument, id, body, doc) {
}
const fullTextObject = {
[`fullText_${language}`]: fullText,
filename: defaultDocument.filename,
fullText: { name: 'fullText', parent: id },
};
body.push(fullTextObject);
Expand Down

0 comments on commit 2c5c501

Please sign in to comment.