From 9b0fc2226b930ea05e41ed2419dce837b3c02688 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 28 Mar 2024 12:54:33 +0100 Subject: [PATCH 01/30] add Elasticsearch annotated-text plugin --- DockerfileElastic | 3 +++ docker-compose.yaml | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 DockerfileElastic diff --git a/DockerfileElastic b/DockerfileElastic new file mode 100644 index 000000000..2b908c5fb --- /dev/null +++ b/DockerfileElastic @@ -0,0 +1,3 @@ +FROM docker.elastic.co/elasticsearch/elasticsearch:8.10.2 + +RUN bin/elasticsearch-plugin install mapper-annotated-text diff --git a/docker-compose.yaml b/docker-compose.yaml index 9277df751..f3f4d63b3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -52,7 +52,9 @@ services: target: /frontend/build command: sh -c "yarn prebuild && yarn start-docker" elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.10.2 + build: + context: . + dockerfile: DockerfileElastic environment: - node.name=ianalyzer-node - discovery.type=single-node From ffdfe30f584cf2a262439c51c8ca05d25d8d70d6 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 28 Mar 2024 12:54:51 +0100 Subject: [PATCH 02/30] extend FoundDocument with entities --- frontend/src/_utilities.scss | 5 +++ .../document-view.component.html | 1 + .../document-view/document-view.component.ts | 2 +- .../document-preview.component.html | 8 ++-- .../document-preview.component.scss | 3 ++ frontend/src/app/document/document.module.ts | 2 + .../entity-legend.component.html | 3 ++ .../entity-legend.component.scss | 7 +++ .../entity-legend.component.spec.ts | 25 +++++++++++ .../entity-legend/entity-legend.component.ts | 21 +++++++++ .../src/app/models/found-document.spec.ts | 44 ++++++++++++++----- frontend/src/app/models/found-document.ts | 21 +++++++-- frontend/src/app/models/search-results.ts | 5 +++ .../src/app/services/download.service.spec.ts | 2 - .../app/services/elastic-search.service.ts | 11 +++-- frontend/src/app/services/entity.service.ts | 18 ++++++++ .../src/app/services/search.service.spec.ts | 2 - frontend/src/mock-data/constructor-helpers.ts | 4 +- frontend/src/mock-data/elastic-search.ts | 2 +- frontend/src/mock-data/search.ts | 5 ++- frontend/src/styles.scss | 13 ++++++ 21 files changed, 175 insertions(+), 29 deletions(-) create mode 100644 frontend/src/app/document/entity-legend/entity-legend.component.html create mode 100644 frontend/src/app/document/entity-legend/entity-legend.component.scss create mode 100644 frontend/src/app/document/entity-legend/entity-legend.component.spec.ts create mode 100644 frontend/src/app/document/entity-legend/entity-legend.component.ts create mode 100644 frontend/src/app/services/entity.service.ts diff --git a/frontend/src/_utilities.scss b/frontend/src/_utilities.scss index 225735d40..15315baae 100644 --- a/frontend/src/_utilities.scss +++ b/frontend/src/_utilities.scss @@ -30,6 +30,11 @@ $section-padding: 3rem 1.5rem; $boxShadow: 0 2px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px rgba(10, 10, 10, 0.1); $boxShadowHover: 0px 5px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px $primary; +$entity-person: #88CCEE; +$entity-location:#44AA99; +$entity-organization: #DDCC77; +$entity-miscellaneous: #AA4499; + @import "~bulma/sass/utilities/_all"; // based on the Bulma loader diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index e3fa71a3d..dbfef7c9b 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -42,6 +42,7 @@ [attr.lang]="document.language(field)" [innerHtml]="formatInnerHtml(field)"> + diff --git a/frontend/src/app/document-view/document-view.component.ts b/frontend/src/app/document-view/document-view.component.ts index 3452d4906..b13790d2f 100644 --- a/frontend/src/app/document-view/document-view.component.ts +++ b/frontend/src/app/document-view/document-view.component.ts @@ -4,7 +4,6 @@ import { CorpusField, FoundDocument, Corpus, QueryModel } from '../models/index' import { DocumentView } from '../models/document-page'; import * as _ from 'lodash'; import { documentIcons } from '../shared/icons'; -import { findByName } from '../utils/utils'; @Component({ selector: 'ia-document-view', @@ -53,6 +52,7 @@ export class DocumentViewComponent implements OnChanges { ngOnChanges(changes: SimpleChanges): void { if (changes.view) { this.activeTab = this.tabFromView(this.view); + this.document.fetchEntities(); } } diff --git a/frontend/src/app/document/document-preview/document-preview.component.html b/frontend/src/app/document/document-preview/document-preview.component.html index 87429833f..dba6cf8ad 100644 --- a/frontend/src/app/document/document-preview/document-preview.component.html +++ b/frontend/src/app/document/document-preview/document-preview.component.html @@ -1,4 +1,7 @@ +
+ Relevance: +
@@ -24,10 +27,9 @@
-
- Relevance: + -
- + - + @@ -40,7 +40,7 @@
+ [innerHtml]="field | elasticsearchHighlight:document">
diff --git a/frontend/src/app/document-view/document-view.component.ts b/frontend/src/app/document-view/document-view.component.ts index b13790d2f..1f5171d79 100644 --- a/frontend/src/app/document-view/document-view.component.ts +++ b/frontend/src/app/document-view/document-view.component.ts @@ -76,62 +76,4 @@ export class DocumentViewComponent implements OnChanges { return field.mappingType === 'geo_point'; } - displayGeoPointField(field: CorpusField) { - let latitude = this.document.fieldValue(field)[field.name][1]; - let longitude = this.document.fieldValue(field)[field.name][0]; - // Round to 2 decimal places - latitude = Math.round(latitude * 100) / 100; - longitude = Math.round(longitude * 100) / 100; - return `Lat: ${latitude}; Lon: ${longitude}`; - } - - /** - * Checks if user has selected fields in the queryModel and whether current field is among them - * Used to check which fields need to be highlighted - */ - selectedFieldsContain(field: CorpusField) { - if (this.queryModel && this.queryModel.searchFields && this.queryModel.searchFields.includes(field)) { - return true; - } else if (this.queryModel && !this.queryModel.searchFields) { - return true; // if there are no selected fields, return true for all fields - } else { - return false; - } - } - - stripTags(htmlString: string){ - const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); - return parseHTML.body.textContent || ''; - } - - formatInnerHtml(field: CorpusField) { - const fieldValue = this.document.fieldValues[field.name]; - - if (_.isEmpty(fieldValue)) { - return; - } - - const highlighted = this.highlightedInnerHtml(field); - return this.addParagraphTags(highlighted); - } - - - highlightedInnerHtml(field: CorpusField) { - let highlighted = this.document.fieldValues[field.name]; - if (this.document.highlight && this.document.highlight.hasOwnProperty(field.name) && - this.selectedFieldsContain(field)) { // only apply highlights to selected search fields - for (const highlight of this.document.highlight[field.name]) { - const stripped_highlight = this.stripTags(highlight); - highlighted = highlighted.replace(stripped_highlight, highlight); - } - return highlighted; - } else { - return this.document.fieldValues[field.name]; - } - } - - addParagraphTags(content: string | string[]) { - const paragraphs = typeof content === 'string' ? content.split('\n') : content; - return paragraphs.map(p => `

${p}

`).join(' '); - } } diff --git a/frontend/src/app/document/document-preview/document-preview.component.html b/frontend/src/app/document/document-preview/document-preview.component.html index dba6cf8ad..ed4e8a2bc 100644 --- a/frontend/src/app/document/document-preview/document-preview.component.html +++ b/frontend/src/app/document/document-preview/document-preview.component.html @@ -18,7 +18,7 @@ diff --git a/frontend/src/app/document/document.module.ts b/frontend/src/app/document/document.module.ts index 62f5ed5f9..6802c8c8a 100644 --- a/frontend/src/app/document/document.module.ts +++ b/frontend/src/app/document/document.module.ts @@ -10,8 +10,7 @@ import { DocumentPopupComponent } from './document-popup/document-popup.componen import { DialogModule } from 'primeng/dialog'; import { DocumentPreviewComponent } from './document-preview/document-preview.component'; import { EntityLegendComponent } from './entity-legend/entity-legend.component'; - - +import { ElasticsearchHighlightPipe, GeoDataPipe, SnippetPipe } from '../pipes'; @NgModule({ declarations: [ @@ -21,6 +20,9 @@ import { EntityLegendComponent } from './entity-legend/entity-legend.component'; DocumentPopupComponent, DocumentPreviewComponent, EntityLegendComponent, + ElasticsearchHighlightPipe, + GeoDataPipe, + SnippetPipe ], imports: [ DialogModule, diff --git a/frontend/src/app/manual/manual-navigation.component.html b/frontend/src/app/manual/manual-navigation.component.html index 85a3e1183..6f2055ef7 100644 --- a/frontend/src/app/manual/manual-navigation.component.html +++ b/frontend/src/app/manual/manual-navigation.component.html @@ -16,7 +16,7 @@ - + diff --git a/frontend/src/app/manual/manual.module.ts b/frontend/src/app/manual/manual.module.ts index ac0d5b3fe..fc13467d9 100644 --- a/frontend/src/app/manual/manual.module.ts +++ b/frontend/src/app/manual/manual.module.ts @@ -4,7 +4,7 @@ import { ManualNavigationComponent } from './manual-navigation.component'; import { ManualComponent } from './manual.component'; import { AboutComponent } from '../about/about.component'; import { PrivacyComponent } from '../privacy/privacy.component'; - +import { RegexHighlightPipe } from '../pipes'; @NgModule({ @@ -13,6 +13,7 @@ import { PrivacyComponent } from '../privacy/privacy.component'; ManualComponent, ManualNavigationComponent, PrivacyComponent, + RegexHighlightPipe ], imports: [ SharedModule diff --git a/frontend/src/app/pipes/elasticsearch-highlight.pipe.ts b/frontend/src/app/pipes/elasticsearch-highlight.pipe.ts new file mode 100644 index 000000000..3a35ae3cb --- /dev/null +++ b/frontend/src/app/pipes/elasticsearch-highlight.pipe.ts @@ -0,0 +1,51 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +import { CorpusField, FoundDocument } from '../models'; +import * as _ from 'lodash'; + +@Pipe({ + name: 'elasticsearchHighlight' +}) +export class ElasticsearchHighlightPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms a text to display highlights fetched from Elasticsearch + * + * @param document a FoundDocument, containing the fetched highlights + */ + transform(field: CorpusField, document: FoundDocument) { + const fieldValue = document.fieldValues[field.name]; + + if (_.isEmpty(fieldValue)) { + return; + } + + const highlighted = this.highlightedInnerHtml(field, document); + const paragraphs = this.addParagraphTags(highlighted); + return this.sanitizer.bypassSecurityTrustHtml(paragraphs); + } + + highlightedInnerHtml(field: CorpusField, document: FoundDocument) { + let highlighted = document.fieldValues[field.name]; + if (document.highlight && document.highlight.hasOwnProperty(field.name)) { + for (const highlight of document.highlight[field.name]) { + const strippedHighlight = this.stripTags(highlight); + highlighted = highlighted.replace(strippedHighlight, highlight); + } + } + return highlighted; + } + + addParagraphTags(content: string | string[]) { + const paragraphs = typeof content === 'string' ? content.split('\n') : content; + return paragraphs.map(p => `

${p}

`).join(' '); + } + + stripTags(htmlString: string){ + const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); + return parseHTML.body.textContent || ''; + } + +} diff --git a/frontend/src/app/pipes/geo-data.pipe.ts b/frontend/src/app/pipes/geo-data.pipe.ts new file mode 100644 index 000000000..cf7b18a23 --- /dev/null +++ b/frontend/src/app/pipes/geo-data.pipe.ts @@ -0,0 +1,25 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +import { CorpusField, FoundDocument } from '../models'; +@Pipe({ + name: 'geoData' +}) +export class GeoDataPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms GeoJSON data + * + * @param document FoundDocument holding the actual data + */ + transform(field: CorpusField, document: FoundDocument) { + let latitude = document.fieldValue(field)[field.name][1]; + let longitude = document.fieldValue(field)[field.name][0]; + // Round to 2 decimal places + latitude = Math.round(latitude * 100) / 100; + longitude = Math.round(longitude * 100) / 100; + return `Lat: ${latitude}; Lon: ${longitude}`; + } + +} diff --git a/frontend/src/app/pipes/index.ts b/frontend/src/app/pipes/index.ts new file mode 100644 index 000000000..a20091083 --- /dev/null +++ b/frontend/src/app/pipes/index.ts @@ -0,0 +1,4 @@ +export * from './elasticsearch-highlight.pipe'; +export * from './geo-data.pipe'; +export * from './regex-highlight.pipe'; +export * from './snippet.pipe'; diff --git a/frontend/src/app/search/highlight.pipe.ts b/frontend/src/app/pipes/regex-highlight.pipe.ts similarity index 72% rename from frontend/src/app/search/highlight.pipe.ts rename to frontend/src/app/pipes/regex-highlight.pipe.ts index 3ee84d437..323292ffa 100644 --- a/frontend/src/app/search/highlight.pipe.ts +++ b/frontend/src/app/pipes/regex-highlight.pipe.ts @@ -3,22 +3,20 @@ import { DomSanitizer } from '@angular/platform-browser'; import { HighlightService } from '../services/highlight.service'; @Pipe({ - name: 'highlight' + name: 'regexHighlight' }) -export class HighlightPipe implements PipeTransform { +export class RegexHighlightPipe implements PipeTransform { constructor(private sanitizer: DomSanitizer, private highlightService: HighlightService) { } /** * Transforms a text to highlight all the text matching the specified query. - * - * @param snippets Only show snippets. When this enabled, line breaks will also be replaced with -- */ - transform(text: string, query: string, snippets: boolean = false) { + transform(text: string, query: string) { const highlights = this.highlightService.highlight(text, query); - const parts = snippets ? this.highlightService.snippets(highlights) : Array.from(highlights); + const parts = Array.from(highlights); const highlightedText = parts.map(part => { - const sanitizedText = this.sanitizedLineBreaks(part.substring, snippets ? ' — ' : '
'); + const sanitizedText = this.sanitizedLineBreaks(part.substring, '
'); return part.isHit ? `${sanitizedText}` : sanitizedText; }).join(''); diff --git a/frontend/src/app/pipes/snippet.pipe.ts b/frontend/src/app/pipes/snippet.pipe.ts new file mode 100644 index 000000000..4634395cb --- /dev/null +++ b/frontend/src/app/pipes/snippet.pipe.ts @@ -0,0 +1,20 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +@Pipe({ + name: 'snippet' +}) +export class SnippetPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms a text to only show its leading characters with an ellipsis + * + * @param nCharacters Specifies how many leading characters should be displayed + */ + transform(text: string, nCharacters=100) { + const snippedText = text.slice(0, nCharacters).concat('...'); + return this.sanitizer.bypassSecurityTrustHtml(snippedText); + } + +} diff --git a/frontend/src/app/search/index.ts b/frontend/src/app/search/index.ts index 11e8bc3bf..40905695a 100644 --- a/frontend/src/app/search/index.ts +++ b/frontend/src/app/search/index.ts @@ -1,4 +1,3 @@ -export * from './highlight.pipe'; export * from './search.component'; export * from './search-relevance.component'; export * from './search-results.component'; diff --git a/frontend/src/app/services/highlight.service.ts b/frontend/src/app/services/highlight.service.ts index 4fdc2a4ea..3ee28b96c 100644 --- a/frontend/src/app/services/highlight.service.ts +++ b/frontend/src/app/services/highlight.service.ts @@ -5,10 +5,7 @@ import { Injectable } from '@angular/core'; * a more scalable approach would need to be implemented if rendering many hits is required. */ const maxHits = 100; -/** - * The maximum number of snippets. - */ -const maxSnippetsCount = 7; + /** * The maximum character length of all the text snippets combined. */ @@ -78,39 +75,6 @@ export class HighlightService { } } - /** - * Gets short snippets from the text part to give the user a short overview of the text content. - */ - public snippets(parts: IterableIterator): TextPart[] { - const snippets: TextPart[] = []; - for ( - let i = 0, next = parts.next(); - !next.done && i < maxSnippetsCount; - i++, next = parts.next() - ) { - snippets.push(next.value); - } - - const lengths = this.getSnippetLengths( - snippets.map((snippet) => snippet.substring.length), - maxSnippetsLength - ); - - snippets.forEach((part, index) => { - part.substring = this.cropSnippetText( - part.substring, - lengths[index], - index === snippets.length - 1 - ? 'left' - : index === 0 - ? 'right' - : 'middle' - ); - }); - - return snippets; - } - /** * Convert the query to a regular expression matching any hit in a string. * @@ -151,79 +115,6 @@ export class HighlightService { ); } - private getSnippetLengths( - actualLengths: number[], - maxTotalLength: number, - croppedSnippets = actualLengths.length - ): number[] { - const targetLengths: number[] = []; - let remainingCharacters = maxTotalLength; - const maxLength = Math.max( - 1, - Math.floor(maxTotalLength / croppedSnippets) - ); - - let remainingSnippets = 0; - - let i = 0; - for (; i < actualLengths.length && remainingCharacters > 0; i++) { - const actualLength = actualLengths[i]; - const targetLength = Math.min(actualLength, maxLength); - - remainingCharacters -= targetLength; - targetLengths[i] = targetLength; - - if (actualLength > targetLength) { - // only the cropped snippets could become longer - remainingSnippets++; - } - } - for (; i < actualLengths.length; i++) { - targetLengths[i] = 0; - } - - if (remainingCharacters && remainingSnippets) { - // if a snippet is shorter than the maximum snippet length, allow the remaining snippets to become longer - const additionalLengths = this.getSnippetLengths( - actualLengths.map( - (length, index) => length - targetLengths[index] - ), - remainingCharacters, - remainingSnippets - ); - return targetLengths.map( - (length, index) => length + additionalLengths[index] - ); - } - - return targetLengths; - } - - private cropSnippetText( - text: string, - maxLength: number, - location: 'left' | 'middle' | 'right' - ): string { - if (text.length <= maxLength) { - return text; - } - - switch (location) { - case 'left': - return text.substr(0, maxLength) + omissionString; - - case 'middle': - return ( - text.substr(0, maxLength / 2) + - omissionString + - text.substr(text.length - maxLength / 2) - ); - - case 'right': - return omissionString + text.slice(-maxLength); - } - } - /** * Get the word patterns match in a query. * diff --git a/frontend/src/app/shared/shared.module.ts b/frontend/src/app/shared/shared.module.ts index 8f25fd406..b19f89b05 100644 --- a/frontend/src/app/shared/shared.module.ts +++ b/frontend/src/app/shared/shared.module.ts @@ -12,7 +12,6 @@ import { BalloonDirective } from '../balloon.directive'; import { DatePickerComponent } from '../corpus-selection/corpus-filter/date-picker/date-picker.component'; import { ErrorComponent } from '../error/error.component'; import { ScrollToDirective } from '../scroll-to.directive'; -import { HighlightPipe } from '../search'; import { DropdownModule } from './dropdown/dropdown.module'; import { TabPanelDirective } from './tabs/tab-panel.directive'; import { TabsComponent } from './tabs/tabs.component'; @@ -22,7 +21,6 @@ import { TabsComponent } from './tabs/tabs.component'; DatePickerComponent, ErrorComponent, BalloonDirective, - HighlightPipe, ScrollToDirective, TabsComponent, TabPanelDirective, @@ -43,7 +41,6 @@ import { TabsComponent } from './tabs/tabs.component'; FormsModule, FontAwesomeModule, BalloonDirective, - HighlightPipe, HttpClientModule, HttpClientXsrfModule, RouterModule, From cb1f833340c3ff22b4b379e129fbd93403e432f6 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 18 Apr 2024 15:23:42 +0200 Subject: [PATCH 06/30] revert show entities on document previews --- .../document-preview/document-preview.component.html | 10 ++++------ .../document-preview/document-preview.component.scss | 3 --- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/frontend/src/app/document/document-preview/document-preview.component.html b/frontend/src/app/document/document-preview/document-preview.component.html index ed4e8a2bc..87429833f 100644 --- a/frontend/src/app/document/document-preview/document-preview.component.html +++ b/frontend/src/app/document/document-preview/document-preview.component.html @@ -1,7 +1,4 @@ -
- Relevance: -
Your tags @@ -21,9 +21,9 @@ + [innerHtml]="field | elasticsearchHighlight:document"> {{displayGeoPointField(field)}}{{field | geoData:document}} {{document.fieldValue(field)}} + [innerHtml]="document.fieldValue(field) | snippet">
@@ -18,7 +15,7 @@ @@ -27,9 +24,10 @@
-
From 1deb9e640a790316d345ed83998c047b8deb4d68 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 25 Apr 2024 13:49:31 +0200 Subject: [PATCH 09/30] feat: working display of named entities --- .../app/document-view/document-view.component.html | 2 +- frontend/src/app/models/found-document.ts | 2 +- frontend/src/app/pipes/entity.pipe.ts | 12 +++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index 3730d2adf..9314fe57a 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -40,7 +40,7 @@
+ [innerHtml]="document.annotations$ | async | entity:document:field.name">
response.annotations) + map( response => response.annotations || []) ); } diff --git a/frontend/src/app/pipes/entity.pipe.ts b/frontend/src/app/pipes/entity.pipe.ts index 92bc99e99..ebb556edf 100644 --- a/frontend/src/app/pipes/entity.pipe.ts +++ b/frontend/src/app/pipes/entity.pipe.ts @@ -1,7 +1,10 @@ import { Pipe, PipeTransform } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; -import { CorpusField, FoundDocument } from '../models'; +import { Observable } from 'rxjs'; + +import { FoundDocument } from '../models'; import * as _ from 'lodash'; + @Pipe({ name: 'entity' }) @@ -14,10 +17,9 @@ export class EntityPipe implements PipeTransform { * * @param document FoundDocument holding the actual data */ - transform(field: CorpusField, document: FoundDocument) { - const newText = document.annotations$.map( - (annotation)=> _.set(document.fieldValues, _.keys(annotation)[0], _.values(annotation)[0])); - return newText; + transform(annotations$: Observable<{[fieldName: string]: string}[]>, document: FoundDocument, fieldName: string) { + const newText = annotations$[fieldName]; + return newText || document.fieldValues[fieldName]; } } From e08b20e76f1a71cd4771d76d7bc26ad2870fc2f1 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 25 Apr 2024 14:07:05 +0200 Subject: [PATCH 10/30] fix: update backend parsing of annotations --- backend/es/tests/test_named_entity_search.py | 5 ++--- backend/es/views.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/es/tests/test_named_entity_search.py b/backend/es/tests/test_named_entity_search.py index bb12328c6..81afc8460 100644 --- a/backend/es/tests/test_named_entity_search.py +++ b/backend/es/tests/test_named_entity_search.py @@ -3,9 +3,8 @@ def test_ner_search_view(es_ner_search_client, client, times_user): client.force_login(times_user) - route = '/api/es/times/named_entities' - data = {'id': 'my_identifier'} - response = client.post(route, data, content_type='application/json') + route = '/api/es/times/my_identifier/named_entities' + response = client.get(route, content_type='application/json') assert response.status_code == 200 diff --git a/backend/es/views.py b/backend/es/views.py index ceabd6dc3..645fe86e6 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -168,7 +168,7 @@ def add_terms(self, fields: list[str]) -> list[dict]: def find_entities(self, input_text: str, entity_classes: set) -> str: # regex pattern to match annotations of format "[Wally](Person)" and split it into two groups - pattern = re.compile('(\[[a-zA-Z ]+\])(\([a-zA-Z ]+\))') + pattern = re.compile('(\[[^]]+\])(\([A-Z]+\))') annotations = list(set(pattern.findall(input_text))) for annotation in annotations: input_text = self.substitute_annotation_with_tag( From 1df805054860f0132cc14eb0e668d3a2de0d5b4f Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Fri, 26 Apr 2024 16:43:51 +0200 Subject: [PATCH 11/30] add toggle component --- .../document-popup.component.html | 6 +- .../document-popup.component.ts | 8 ++- .../app/search/search-results.component.html | 1 + frontend/src/app/shared/shared.module.ts | 3 + .../app/shared/toggle/toggle.component.html | 4 ++ .../app/shared/toggle/toggle.component.scss | 68 +++++++++++++++++++ .../shared/toggle/toggle.component.spec.ts | 25 +++++++ .../src/app/shared/toggle/toggle.component.ts | 22 ++++++ 8 files changed, 134 insertions(+), 3 deletions(-) create mode 100644 frontend/src/app/shared/toggle/toggle.component.html create mode 100644 frontend/src/app/shared/toggle/toggle.component.scss create mode 100644 frontend/src/app/shared/toggle/toggle.component.spec.ts create mode 100644 frontend/src/app/shared/toggle/toggle.component.ts diff --git a/frontend/src/app/document/document-popup/document-popup.component.html b/frontend/src/app/document/document-popup/document-popup.component.html index 6f379e664..49976a19c 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.html +++ b/frontend/src/app/document/document-popup/document-popup.component.html @@ -1,7 +1,9 @@ + [responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true"> + + Document {{document.position}} of {{page.total}} + diff --git a/frontend/src/app/document/document-popup/document-popup.component.ts b/frontend/src/app/document/document-popup/document-popup.component.ts index 98109ffa5..1a99c58d5 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.ts @@ -4,7 +4,7 @@ import { takeUntil } from 'rxjs/operators'; import * as _ from 'lodash'; import { FoundDocument, QueryModel } from '../../models'; import { Subject } from 'rxjs'; -import { documentIcons, actionIcons } from '../../shared/icons'; +import { documentIcons, actionIcons, corpusIcons } from '../../shared/icons'; @Component({ selector: 'ia-document-popup', @@ -23,6 +23,8 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { actionIcons = actionIcons; documentIcons = documentIcons; + showNamedEntities = false; + private refresh$ = new Subject(); get documentPageLink(): string[] { @@ -63,4 +65,8 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { this.visible = false; } } + + toggleNER(): void { + this.showNamedEntities = !this.showNamedEntities; + } } diff --git a/frontend/src/app/search/search-results.component.html b/frontend/src/app/search/search-results.component.html index 2d26d15d6..26d6abeb5 100644 --- a/frontend/src/app/search/search-results.component.html +++ b/frontend/src/app/search/search-results.component.html @@ -9,6 +9,7 @@

- {{(pageResults.to$ | async)}}

+

NER

diff --git a/frontend/src/app/shared/shared.module.ts b/frontend/src/app/shared/shared.module.ts index b19f89b05..2f9c77425 100644 --- a/frontend/src/app/shared/shared.module.ts +++ b/frontend/src/app/shared/shared.module.ts @@ -15,6 +15,7 @@ import { ScrollToDirective } from '../scroll-to.directive'; import { DropdownModule } from './dropdown/dropdown.module'; import { TabPanelDirective } from './tabs/tab-panel.directive'; import { TabsComponent } from './tabs/tabs.component'; +import { ToggleComponent } from './toggle/toggle.component'; @NgModule({ declarations: [ @@ -24,6 +25,7 @@ import { TabsComponent } from './tabs/tabs.component'; ScrollToDirective, TabsComponent, TabPanelDirective, + ToggleComponent, ], exports: [ // shared components @@ -45,6 +47,7 @@ import { TabsComponent } from './tabs/tabs.component'; HttpClientXsrfModule, RouterModule, TableModule, + ToggleComponent, ], imports: [ BrowserAnimationsModule, diff --git a/frontend/src/app/shared/toggle/toggle.component.html b/frontend/src/app/shared/toggle/toggle.component.html new file mode 100644 index 000000000..64fafd1de --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.html @@ -0,0 +1,4 @@ +
+ + +
\ No newline at end of file diff --git a/frontend/src/app/shared/toggle/toggle.component.scss b/frontend/src/app/shared/toggle/toggle.component.scss new file mode 100644 index 000000000..7ca30a115 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.scss @@ -0,0 +1,68 @@ +@import "../../../_utilities"; + +/* The switch - the box around the slider */ +.toggle-container { + position: absolute; + margin-left: .5rem; + margin-top: -.2rem; + display: inline-block; + width: 4rem; + height: 2rem; + pointer-events: none; + + /* Hide default HTML checkbox */ + input { + opacity: 0; + width: 100%; + height: 100%; + pointer-events: all; + } +} + + +/* The slider */ +.slider { + position: absolute; + cursor: pointer; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: $highlight-color; + transition: .4s; + border-radius: 35px; + pointer-events: none; + &:before { + content: ""; + z-index: 20; + position: absolute; + height: 1.6rem; + width: 1.6rem; + left: .2rem; + bottom: .2rem; + background-color: white; + transition: .4s; + border-radius: 50%; + } +} + + + +input:checked { + + .slider { + background-color: $primary; + } + + + .slider:before { + transform: translateX(2rem); + -webkit-transform: translateX(2rem); + -moz-transform: translateX(2rem); + -ms-transform: translateX(2rem); + -o-transform: translateX(2rem); + } + + + .slider:after { + left: calc(100% - 5px); + transform: translateX(-100%); + } +} \ No newline at end of file diff --git a/frontend/src/app/shared/toggle/toggle.component.spec.ts b/frontend/src/app/shared/toggle/toggle.component.spec.ts new file mode 100644 index 000000000..301ab008b --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { ToggleComponent } from './toggle.component'; + +describe('ToggleComponent', () => { + let component: ToggleComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ ToggleComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(ToggleComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/shared/toggle/toggle.component.ts b/frontend/src/app/shared/toggle/toggle.component.ts new file mode 100644 index 000000000..13194c3c8 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.ts @@ -0,0 +1,22 @@ +import { Component, EventEmitter, OnInit, Output } from '@angular/core'; + +@Component({ + selector: 'ia-toggle', + templateUrl: './toggle.component.html', + styleUrls: ['./toggle.component.scss'] +}) +export class ToggleComponent implements OnInit { + @Output() toggled = new EventEmitter(); + active = false; + + constructor() { } + + ngOnInit(): void { + } + + public toggleButton() { + this.active = !this.active; + this.toggled.emit(this.active); + } + +} From b043b8a7f57d8afb471dc72bc37cc564fd936b7c Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 19 Jun 2024 17:59:54 +0200 Subject: [PATCH 12/30] use toggle in document-popup --- frontend/src/app/document-view/document-view.component.html | 4 ++-- frontend/src/app/document-view/document-view.component.ts | 3 +++ .../app/document/document-popup/document-popup.component.html | 3 ++- .../app/document/document-popup/document-popup.component.ts | 4 ++-- frontend/src/app/search/search-results.component.html | 1 - 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index 9314fe57a..ce83d7098 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -38,7 +38,7 @@
-
@@ -48,7 +48,7 @@ [innerHtml]="field | elasticsearchHighlight:document">
- +
diff --git a/frontend/src/app/document-view/document-view.component.ts b/frontend/src/app/document-view/document-view.component.ts index a1fd4f8c2..438caa8f1 100644 --- a/frontend/src/app/document-view/document-view.component.ts +++ b/frontend/src/app/document-view/document-view.component.ts @@ -24,6 +24,9 @@ export class DocumentViewComponent implements OnChanges { @Input() public view: DocumentView; + @Input() + public showEntities: boolean; + documentIcons = documentIcons; /** active tab on opening */ diff --git a/frontend/src/app/document/document-popup/document-popup.component.html b/frontend/src/app/document/document-popup/document-popup.component.html index 49976a19c..501c025f5 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.html +++ b/frontend/src/app/document/document-popup/document-popup.component.html @@ -3,9 +3,10 @@ [responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true"> Document {{document.position}} of {{page.total}} + NER - +
diff --git a/frontend/src/app/document/document-popup/document-popup.component.ts b/frontend/src/app/document/document-popup/document-popup.component.ts index 1a99c58d5..7ff4b6351 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.ts @@ -66,7 +66,7 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { } } - toggleNER(): void { - this.showNamedEntities = !this.showNamedEntities; + toggleNER(active: boolean): void { + this.showNamedEntities = active; } } diff --git a/frontend/src/app/search/search-results.component.html b/frontend/src/app/search/search-results.component.html index 26d6abeb5..2d26d15d6 100644 --- a/frontend/src/app/search/search-results.component.html +++ b/frontend/src/app/search/search-results.component.html @@ -9,7 +9,6 @@

- {{(pageResults.to$ | async)}}

-

NER

From c0133955725f92b63c12e72896164bdff9de0571 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 19 Jun 2024 18:00:18 +0200 Subject: [PATCH 13/30] simplify unpacking named_entities response --- frontend/src/app/models/found-document.ts | 10 ++++------ frontend/src/app/services/entity.service.ts | 4 +--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/frontend/src/app/models/found-document.ts b/frontend/src/app/models/found-document.ts index 744bddc01..ddbc45675 100644 --- a/frontend/src/app/models/found-document.ts +++ b/frontend/src/app/models/found-document.ts @@ -126,13 +126,11 @@ export class FoundDocument { private fetchAnnotatedEntities(): Observable<{[fieldName: string]: string}[]> { const response$ = this.entityService.getDocumentEntities(this.corpus, this.id); - response$.pipe( - map( response => response.entities ) - ).toPromise().then((entities) => { - this.entities = entities; - }); return response$.pipe( - map( response => response.annotations || []) + map( response => { + this.entities = response.entities; + return response.annotations || []; + }) ); } diff --git a/frontend/src/app/services/entity.service.ts b/frontend/src/app/services/entity.service.ts index 5d8a35f0a..d740438a7 100644 --- a/frontend/src/app/services/entity.service.ts +++ b/frontend/src/app/services/entity.service.ts @@ -15,8 +15,6 @@ export class EntityService { public getDocumentEntities(corpus: Corpus, id: string): Observable { const url = `/api/es/${corpus.name}/${id}/named_entities`; - return this.http.get(url).pipe( - take(1), - ); + return this.http.get(url); } } From 666d9d364f55423dc4d30d3fbba8092751538107 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 20 Jun 2024 17:35:23 +0200 Subject: [PATCH 14/30] adjust colour palette --- frontend/src/_utilities.scss | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/src/_utilities.scss b/frontend/src/_utilities.scss index 3a77ecb6e..6de7fcc9f 100644 --- a/frontend/src/_utilities.scss +++ b/frontend/src/_utilities.scss @@ -30,10 +30,10 @@ $section-padding: 3rem 1.5rem; $boxShadow: 0 2px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px rgba(10, 10, 10, 0.1); $boxShadowHover: 0px 5px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px $primary; -$entity-person: #88CCEE; -$entity-location:#44AA99; -$entity-organization: #DDCC77; -$entity-miscellaneous: #AA4499; +$entity-person: #ADDBE0; +$entity-location: #ADF489; +$entity-organization: #E6D5A8; +$entity-miscellaneous: #EF9AB4; @import "bulma/sass/utilities/_all"; From 22fcda6f0ef4057d42532a4d46eb7ac52793ea70 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 20 Jun 2024 18:45:45 +0200 Subject: [PATCH 15/30] fix: remove snippets test for highlight service --- .../app/services/highlight.service.spec.ts | 37 ------------------- .../src/app/services/highlight.service.ts | 1 - 2 files changed, 38 deletions(-) diff --git a/frontend/src/app/services/highlight.service.spec.ts b/frontend/src/app/services/highlight.service.spec.ts index 2eb8aa049..0f50f0cfb 100644 --- a/frontend/src/app/services/highlight.service.spec.ts +++ b/frontend/src/app/services/highlight.service.spec.ts @@ -90,43 +90,6 @@ describe('HighlightService', () => { [13, 'في']]); }); - it('Should limit the length of hits using snippets', () => { - const text = generateSequence(0, 10000); - const remainingLength = (maxSnippetsLength - 4) * 0.5; - const leftLength = Math.ceil(remainingLength); - const rightLength = Math.floor(remainingLength); - const sequenceSnippetsLength = Math.ceil(leftLength / 5); - - const highlights = highlightService.highlight(text, '5000'); - const snippets = highlightService.snippets(highlights); - - const result = getHighlightedString(snippets); - const expected = getHighlightedString([ - { - substring: omissionString + generateSequence(5000 - sequenceSnippetsLength, 5000).slice(-leftLength + 1) + ' ', - isHit: false - }, - { - substring: '5000', - isHit: true - }, - { - substring: ' ' + generateSequence(5001, 5001 + sequenceSnippetsLength).substr(0, rightLength - 1) + omissionString, - isHit: false - }]); - - expect(result).toEqual(expected); - }); - - it('Should pass short snippets', () => { - const highlights = highlightService.highlight('hello world!', ''); - const snippets = highlightService.snippets(highlights); - expect(snippets).toEqual([{ - isHit: false, - substring: 'hello world!' - }]); - }); - it('Should highlight multiline text', () => { expectHighlights( // eslint-disable-next-line max-len diff --git a/frontend/src/app/services/highlight.service.ts b/frontend/src/app/services/highlight.service.ts index 3ee28b96c..e4bc31941 100644 --- a/frontend/src/app/services/highlight.service.ts +++ b/frontend/src/app/services/highlight.service.ts @@ -49,7 +49,6 @@ export class HighlightService { } let result: RegExpExecArray; - const parsedText: TextPart[] = []; let lastIndex = 0; for ( From f9c117a0cd6ca5483156db2be6a3263db6a20bc4 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 20 Jun 2024 22:34:22 +0200 Subject: [PATCH 16/30] fix backend tests --- backend/es/conftest.py | 24 ++++++-------------- backend/es/tests/test_named_entity_search.py | 7 +++--- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/backend/es/conftest.py b/backend/es/conftest.py index 46a5da917..4545a2801 100644 --- a/backend/es/conftest.py +++ b/backend/es/conftest.py @@ -1,4 +1,6 @@ import pytest +from time import sleep + from django.contrib.auth.models import Group from addcorpus.python_corpora.load_corpus import load_corpus_definition @@ -17,21 +19,18 @@ def corpus_definition(mock_corpus): yield corpus -@pytest.fixture(scope='module') -def es_ner_search_client(es_client, mock_corpus): +@pytest.fixture() +def es_ner_search_client(es_client, basic_mock_corpus, basic_corpus_public, index_basic_mock_corpus): """ Create and populate an index for the mock corpus in elasticsearch. Returns an elastic search client for the mock corpus. """ - # add data from mock corpus - corpus = load_corpus_definition(mock_corpus) - es_index.create(es_client, corpus, False, True, False) - es_index.populate(es_client, mock_corpus, corpus) - es_client.indices.put_mapping(index=corpus.es_index, properties={ + corpus = Corpus.objects.get(name=basic_mock_corpus) + es_client.indices.put_mapping(index=corpus.configuration.es_index, properties={ "content_ner": {"type": "annotated_text"}}) - es_client.index(index=corpus.es_index, document={ + es_client.index(index=corpus.configuration.es_index, document={ 'id': 'my_identifier', 'content': 'Guybrush Threepwood is looking for treasure on Monkey Island', 'content_ner': '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)'}) @@ -39,16 +38,7 @@ def es_ner_search_client(es_client, mock_corpus): # ES is "near real time", so give it a second before we start searching the index sleep(1) yield es_client - # delete index when done - es_client.indices.delete(index='times-test') - -@pytest.fixture() -def basic_corpus_index(es_client, basic_corpus): - corpus = load_corpus_definition(basic_corpus) - es_index.create(es_client, corpus, False, True, False) - yield es_client - es_client.indices.delete(index=corpus.es_index) @pytest.fixture() def es_index_client(es_client, mock_corpus): diff --git a/backend/es/tests/test_named_entity_search.py b/backend/es/tests/test_named_entity_search.py index 81afc8460..a6dde20dc 100644 --- a/backend/es/tests/test_named_entity_search.py +++ b/backend/es/tests/test_named_entity_search.py @@ -1,9 +1,8 @@ from es.views import NamedEntitySearchView -def test_ner_search_view(es_ner_search_client, client, times_user): - client.force_login(times_user) - route = '/api/es/times/my_identifier/named_entities' +def test_ner_search_view(es_ner_search_client, client): + route = '/api/es/mock-csv-corpus/my_identifier/named_entities' response = client.get(route, content_type='application/json') assert response.status_code == 200 @@ -34,7 +33,7 @@ def test_construct_ner_query(): def test_find_named_entity_fields(es_ner_search_client): viewset = NamedEntitySearchView() fields = viewset.find_named_entity_fields( - es_ner_search_client, 'times-test') + es_ner_search_client, 'test-basic-corpus') assert len(fields) == 1 assert fields[0] == 'content_ner' From 9b1bf19fd16e2560ba029ad0b3c763f22e4f98a5 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Fri, 21 Jun 2024 11:52:49 +0200 Subject: [PATCH 17/30] fix frontend tests: remove circular imports --- frontend/src/app/common-test-bed.ts | 9 ++++++++- frontend/src/app/models/found-document.spec.ts | 15 ++++++++------- .../app/services/elastic-search.service.spec.ts | 8 ++++++-- frontend/src/app/services/entity.service.ts | 1 - frontend/src/mock-data/constructor-helpers.ts | 6 +++--- frontend/src/mock-data/elastic-search.ts | 2 +- frontend/src/mock-data/entity.ts | 11 +++++++++++ 7 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 frontend/src/mock-data/entity.ts diff --git a/frontend/src/app/common-test-bed.ts b/frontend/src/app/common-test-bed.ts index 25de1b887..eecd514e4 100644 --- a/frontend/src/app/common-test-bed.ts +++ b/frontend/src/app/common-test-bed.ts @@ -10,9 +10,12 @@ import { AuthServiceMock } from '../mock-data/auth'; import { CorpusServiceMock } from '../mock-data/corpus'; import { DialogServiceMock } from '../mock-data/dialog'; import { ElasticSearchServiceMock } from '../mock-data/elastic-search'; +import { EntityServiceMock } from '../mock-data/entity'; import { MockCorpusResponse } from '../mock-data/corpus-response'; import { SearchServiceMock } from '../mock-data/search'; -import { ApiService, AuthService, CorpusService, DialogService, ElasticSearchService, SearchService } from './services'; +import { ApiService, AuthService, CorpusService, DialogService, SearchService } from './services'; +import { ElasticSearchService } from './services/elastic-search.service'; +import { EntityService } from './services/entity.service'; import { WordmodelsService } from './services/wordmodels.service'; import { WordmodelsServiceMock } from '../mock-data/wordmodels'; import { VisualizationService } from './services/visualization.service'; @@ -49,6 +52,10 @@ export const commonTestBed = () => { provide: ElasticSearchService, useValue: new ElasticSearchServiceMock(), }, + { + provide: EntityService, + useValue: new EntityServiceMock(), + }, { provide: ElementRef, useClass: MockElementRef, diff --git a/frontend/src/app/models/found-document.spec.ts b/frontend/src/app/models/found-document.spec.ts index 5c467de18..2c2a21cd6 100644 --- a/frontend/src/app/models/found-document.spec.ts +++ b/frontend/src/app/models/found-document.spec.ts @@ -4,12 +4,13 @@ import { reduce, take } from 'rxjs/operators'; import { Observable } from 'rxjs'; import { makeDocument } from '../../mock-data/constructor-helpers'; import { mockCorpus, mockCorpus3 } from '../../mock-data/corpus'; +import { EntityServiceMock } from '../../mock-data/entity'; +import { TagServiceMock, mockTags } from '../../mock-data/tag'; import { FoundDocument } from './found-document'; +import { EntityService } from '../services/entity.service'; import { TagService } from '../services/tag.service'; -import { TagServiceMock, mockTags } from '../../mock-data/tag'; import { Tag } from './tag'; -import { ElasticSearchService } from '../services'; -import { ElasticSearchServiceMock } from '../../mock-data/elastic-search'; + const maxScore = 2.9113607; const mockResponse = { @@ -36,19 +37,19 @@ const mockResponse = { fdescribe('FoundDocument', () => { const mockTagService = new TagServiceMock() as any; - const mockElasticService = new ElasticSearchServiceMock() as any; + const mockEntityService = new EntityServiceMock() as any; beforeEach(() => { TestBed.configureTestingModule({ providers: [ { provide: TagService, useClass: TagServiceMock }, - { provide: ElasticSearchService, useClass: ElasticSearchServiceMock } + { provide: EntityService, useClass: EntityServiceMock } ] }); }); fit('should construct from an elasticsearch response', () => { - const document = new FoundDocument(mockTagService, mockElasticService, mockCorpus, mockResponse, maxScore); + const document = new FoundDocument(mockTagService, mockEntityService, mockCorpus, mockResponse, maxScore); expect(document.id).toBe('1994_troonrede'); expect(document.fieldValues['monarch']).toBe('Beatrix'); @@ -110,7 +111,7 @@ fdescribe('FoundDocument', () => { ] } }; - const document = new FoundDocument(mockTagService, mockElasticService, mockCorpus, searchResponse, maxScore); + const document = new FoundDocument(mockTagService, mockEntityService, mockCorpus, searchResponse, maxScore); expect(document.fieldValues['content']).toEqual( 'Wally was last seen in Paris'); })); diff --git a/frontend/src/app/services/elastic-search.service.spec.ts b/frontend/src/app/services/elastic-search.service.spec.ts index 53841cc52..e1ace87e8 100644 --- a/frontend/src/app/services/elastic-search.service.spec.ts +++ b/frontend/src/app/services/elastic-search.service.spec.ts @@ -3,9 +3,12 @@ import { HttpClientTestingModule, HttpTestingController } from '@angular/common/ import { ElasticSearchService, SearchResponse } from './elastic-search.service'; import { QueryModel } from '../models'; import { mockCorpus, mockField, mockField2 } from '../../mock-data/corpus'; -import { TagService } from './tag.service'; +import { EntityService } from './entity.service'; +import { EntityServiceMock } from '../../mock-data/entity'; import { TagServiceMock } from '../../mock-data/tag'; -import { Aggregator, TermsAggregator } from '../models/aggregation'; +import { TagService } from './tag.service'; +import { TermsAggregator } from '../models/aggregation'; + const mockResponse: SearchResponse = { took: 4, @@ -65,6 +68,7 @@ describe('ElasticSearchService', () => { TestBed.configureTestingModule({ providers: [ ElasticSearchService, + { provide: EntityService, useValue: new EntityServiceMock()}, { provide: TagService, useValue: new TagServiceMock() } ], imports: [ HttpClientTestingModule ] diff --git a/frontend/src/app/services/entity.service.ts b/frontend/src/app/services/entity.service.ts index d740438a7..056e1e543 100644 --- a/frontend/src/app/services/entity.service.ts +++ b/frontend/src/app/services/entity.service.ts @@ -3,7 +3,6 @@ import { Injectable } from '@angular/core'; import { Observable } from 'rxjs'; import { Corpus, NamedEntitiesResult } from '../models'; -import { map, share, shareReplay, take } from 'rxjs/operators'; @Injectable({ providedIn: 'root', diff --git a/frontend/src/mock-data/constructor-helpers.ts b/frontend/src/mock-data/constructor-helpers.ts index 24c17c218..b369eb305 100644 --- a/frontend/src/mock-data/constructor-helpers.ts +++ b/frontend/src/mock-data/constructor-helpers.ts @@ -3,10 +3,10 @@ import { Corpus, FieldValues, FoundDocument, HighlightResult, SearchHit } from '../app/models'; import { mockCorpus } from './corpus'; import { TagServiceMock } from './tag'; -import { ElasticSearchServiceMock } from './elastic-search'; +import { EntityServiceMock } from './entity'; const tagService = new TagServiceMock() as any; -const elasticSearchService = new ElasticSearchServiceMock() as any; +const entityService = new EntityServiceMock() as any; export const makeDocument = ( fieldValues: FieldValues, @@ -18,6 +18,6 @@ export const makeDocument = ( const hit: SearchHit = { _id: id, _score: relevance, _source: fieldValues, highlight }; - return new FoundDocument(tagService, elasticSearchService, corpus, hit); + return new FoundDocument(tagService, entityService, corpus, hit); }; diff --git a/frontend/src/mock-data/elastic-search.ts b/frontend/src/mock-data/elastic-search.ts index d9a542f09..c584514ad 100644 --- a/frontend/src/mock-data/elastic-search.ts +++ b/frontend/src/mock-data/elastic-search.ts @@ -1,4 +1,4 @@ -import { FoundDocument, NamedEntitiesResult, SearchResults } from '../app/models'; +import { FoundDocument, SearchResults } from '../app/models'; import { makeDocument } from './constructor-helpers'; export class ElasticSearchServiceMock { diff --git a/frontend/src/mock-data/entity.ts b/frontend/src/mock-data/entity.ts new file mode 100644 index 000000000..7f6760916 --- /dev/null +++ b/frontend/src/mock-data/entity.ts @@ -0,0 +1,11 @@ +import { of, Observable } from 'rxjs'; + +import { Corpus, NamedEntitiesResult } from '../app/models'; + +export class EntityServiceMock { + + public getDocumentEntities(corpus: Corpus, id: string): Observable { + return of({annotations: [], entities: []}) + + } +} From 9b9785eceb41d6b25f43655974a2602aab3ff41e Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Fri, 21 Jun 2024 21:23:19 +0200 Subject: [PATCH 18/30] fix: move frontend unit test --- .../document-view.component.spec.ts | 21 +++++++++++------- .../src/app/models/found-document.spec.ts | 22 +------------------ frontend/src/mock-data/entity.ts | 3 +-- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/frontend/src/app/document-view/document-view.component.spec.ts b/frontend/src/app/document-view/document-view.component.spec.ts index f59858f51..d669f30af 100644 --- a/frontend/src/app/document-view/document-view.component.spec.ts +++ b/frontend/src/app/document-view/document-view.component.spec.ts @@ -1,4 +1,4 @@ -import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; +import { ComponentFixture, TestBed, fakeAsync, tick, waitForAsync } from '@angular/core/testing'; import { By } from '@angular/platform-browser'; import * as _ from 'lodash'; import { mockCorpus, mockField } from '../../mock-data/corpus'; @@ -7,6 +7,7 @@ import { commonTestBed } from '../common-test-bed'; import { DocumentViewComponent } from './document-view.component'; import { makeDocument } from '../../mock-data/constructor-helpers'; +import { of } from 'rxjs'; describe('DocumentViewComponent', () => { let component: DocumentViewComponent; @@ -20,10 +21,9 @@ describe('DocumentViewComponent', () => { fixture = TestBed.createComponent(DocumentViewComponent); component = fixture.componentInstance; component.corpus = _.merge({ - scanImageType: 'farout_image_type', - fields: [mockField] + scanImageType: 'farout_image_type' }, mockCorpus); - component.document = makeDocument({ great_field: 'Hello world!' }); + component.document = makeDocument({ great_field: 'Hello world!', speech: 'Wally was last seen in Paris' }); fixture.detectChanges(); }); @@ -31,11 +31,8 @@ describe('DocumentViewComponent', () => { expect(component).toBeTruthy(); }); - it('should render fields', async () => { - await fixture.whenStable(); - + it('should render fields', () => { expect(component.propertyFields).toEqual([mockField]); - const debug = fixture.debugElement.queryAll(By.css('[data-test-field-value]')); expect(debug.length).toEqual(1); // number of fields const element = debug[0].nativeElement; @@ -48,4 +45,12 @@ describe('DocumentViewComponent', () => { expect(debug[0].attributes['id']).toBe('tab-speech'); expect(debug[1].attributes['id']).toBe('tab-scan'); }); + + it('shows a named entity legend if showEntities is true', () => { + expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeFalsy(); + component.showEntities = true; + fixture.detectChanges(); + expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeTruthy(); + }); + }); diff --git a/frontend/src/app/models/found-document.spec.ts b/frontend/src/app/models/found-document.spec.ts index 2c2a21cd6..b17fe8d38 100644 --- a/frontend/src/app/models/found-document.spec.ts +++ b/frontend/src/app/models/found-document.spec.ts @@ -48,7 +48,7 @@ fdescribe('FoundDocument', () => { }); }); - fit('should construct from an elasticsearch response', () => { + it('should construct from an elasticsearch response', () => { const document = new FoundDocument(mockTagService, mockEntityService, mockCorpus, mockResponse, maxScore); expect(document.id).toBe('1994_troonrede'); @@ -95,24 +95,4 @@ fdescribe('FoundDocument', () => { }); })); - it('should fetch and display named entities', fakeAsync(() => { - const searchResponse = { - _index: 'test_index', - _id: 'my_identifier', - _score: 2.9113607, - _source: { - date: '1994-09-20', - id: 'my_identifier', - content: 'Wally was last seen in Paris.' - }, - highlight: { - content: [ - 'seen' - ] - } - }; - const document = new FoundDocument(mockTagService, mockEntityService, mockCorpus, searchResponse, maxScore); - expect(document.fieldValues['content']).toEqual( - 'Wally was last seen in Paris'); - })); }); diff --git a/frontend/src/mock-data/entity.ts b/frontend/src/mock-data/entity.ts index 7f6760916..3fefbfc25 100644 --- a/frontend/src/mock-data/entity.ts +++ b/frontend/src/mock-data/entity.ts @@ -5,7 +5,6 @@ import { Corpus, NamedEntitiesResult } from '../app/models'; export class EntityServiceMock { public getDocumentEntities(corpus: Corpus, id: string): Observable { - return of({annotations: [], entities: []}) - + return of({annotations: [{'content': 'Wally was last seen in Paris'}], entities: ['location', 'person']}) } } From 14756c79621210c3f3454b6586641c2cf3944469 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 24 Jun 2024 13:37:49 +0200 Subject: [PATCH 19/30] add flag at start of highlight to help colorblind users --- .../document/entity-legend/entity-legend.component.scss | 1 - frontend/src/styles.scss | 8 ++++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.scss b/frontend/src/app/document/entity-legend/entity-legend.component.scss index e269d7265..7f7c6ef4e 100644 --- a/frontend/src/app/document/entity-legend/entity-legend.component.scss +++ b/frontend/src/app/document/entity-legend/entity-legend.component.scss @@ -2,6 +2,5 @@ span.dot { height: .7rem; width: .7rem; margin-right: .4rem; - border-radius: 50%; display: inline-block; } \ No newline at end of file diff --git a/frontend/src/styles.scss b/frontend/src/styles.scss index a07c33961..3b13bf5f7 100644 --- a/frontend/src/styles.scss +++ b/frontend/src/styles.scss @@ -72,15 +72,23 @@ a.dropdown-item[disabled] { } } +span[class^='entity'] { + padding-left: .4em; +} + .entity-per { background-color: $entity-person; + clip-path: polygon(100% 0%, 100% 100%, 0% 100%, 5px 50%, 0% 0%); } .entity-loc { background-color: $entity-location; + clip-path: polygon(5px 0%, 100% 0%, 100% 100%, 5px 100%, 0% 50%); } .entity-org { background-color: $entity-organization; + clip-path: polygon(0 0, 100% 0%, 100% 100%, 5px 100%); } .entity-mis { background-color: $entity-miscellaneous; + clip-path: polygon(5px 0, 100% 0%, 100% 100%, 0% 100%); } From 16cc9b5758382dd3468c8d0836033d89812c9f7a Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 24 Jun 2024 15:19:06 +0200 Subject: [PATCH 20/30] add has_named_entity property to en/disable frontend NER display --- backend/addcorpus/models.py | 16 ++++++++++++++++ backend/addcorpus/serializers.py | 2 ++ 2 files changed, 18 insertions(+) diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index 7835ca348..962943b36 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -21,6 +21,8 @@ from django.db import models from django.db.models.constraints import UniqueConstraint +from ianalyzer.elasticsearch import elasticsearch + MAX_LENGTH_NAME = 126 MAX_LENGTH_DESCRIPTION = 254 MAX_LENGTH_TITLE = 256 @@ -260,6 +262,20 @@ def clean(self): e ]) + @property + def has_named_entities(self): + client = elasticsearch(self.es_index) + try: + mapping = client.indices.get_mapping( + index=self.es_index) + fields = mapping[self.es_index].get( + 'mappings', {}).get('properties', {}).keys() + if any(field.endswith('_ner') for field in fields): + return True + except: + return False + return False + FIELD_DISPLAY_TYPES = [ ('text_content', 'text content'), diff --git a/backend/addcorpus/serializers.py b/backend/addcorpus/serializers.py index d6fb31db9..b1c3eb6ae 100644 --- a/backend/addcorpus/serializers.py +++ b/backend/addcorpus/serializers.py @@ -70,6 +70,7 @@ class CorpusConfigurationSerializer(serializers.ModelSerializer): languages = serializers.ListField(child=LanguageField()) category = PrettyChoiceField(choices=CATEGORIES) default_sort = NonEmptyJSONField() + has_named_entities = serializers.ReadOnlyField() class Meta: model = CorpusConfiguration @@ -89,6 +90,7 @@ class Meta: 'default_sort', 'language_field', 'fields', + 'has_named_entities', ] From fd017113cd84110b1a90a03f24afe7a6edfd1870 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 24 Jun 2024 15:24:15 +0200 Subject: [PATCH 21/30] feat: deactivate showing NER option for corpora without entities --- .../app/document/document-popup/document-popup.component.html | 2 +- .../app/document/document-popup/document-popup.component.ts | 4 ++++ frontend/src/app/models/corpus.ts | 1 + frontend/src/app/services/corpus.service.ts | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/frontend/src/app/document/document-popup/document-popup.component.html b/frontend/src/app/document/document-popup/document-popup.component.html index 501c025f5..8f84e345d 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.html +++ b/frontend/src/app/document/document-popup/document-popup.component.html @@ -3,7 +3,7 @@ [responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true"> Document {{document.position}} of {{page.total}} - NER + NER diff --git a/frontend/src/app/document/document-popup/document-popup.component.ts b/frontend/src/app/document/document-popup/document-popup.component.ts index 7ff4b6351..b41997c14 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.ts @@ -24,6 +24,7 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { documentIcons = documentIcons; showNamedEntities = false; + showNEROption = false; private refresh$ = new Subject(); @@ -40,6 +41,9 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { } ngOnChanges(changes: SimpleChanges): void { + if (changes.queryModel) { + this.showNEROption = this.queryModel.corpus.hasNamedEntities; + } if (changes.page) { this.refresh$.next(); this.focusUpdate(); diff --git a/frontend/src/app/models/corpus.ts b/frontend/src/app/models/corpus.ts index 4906e2966..4e58d42fe 100644 --- a/frontend/src/app/models/corpus.ts +++ b/frontend/src/app/models/corpus.ts @@ -29,6 +29,7 @@ export class Corpus { public wordModelsPresent: boolean, public languages: string[], public category: string, + public hasNamedEntities: boolean, public documentContext?: DocumentContext, public newHighlight?: boolean, public defaultSort?: SortState, diff --git a/frontend/src/app/services/corpus.service.ts b/frontend/src/app/services/corpus.service.ts index 7092bd591..6c4e7ff6d 100644 --- a/frontend/src/app/services/corpus.service.ts +++ b/frontend/src/app/services/corpus.service.ts @@ -89,6 +89,7 @@ export class CorpusService { data.word_models_present, data.languages, data.category, + data.has_named_entities, this.parseDocumentContext(data.document_context, allFields), data.new_highlight, this.parseDefaultSort(data.default_sort, allFields), From 51043b6c12bd198733c966c2248573cbdf5d66aa Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 26 Jun 2024 10:27:03 +0200 Subject: [PATCH 22/30] add unit test --- .../document-view.component.spec.ts | 3 +-- .../document-popup.component.spec.ts | 26 ++++++++++++++++++- .../src/app/models/found-document.spec.ts | 2 +- frontend/src/mock-data/corpus.ts | 3 +++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/frontend/src/app/document-view/document-view.component.spec.ts b/frontend/src/app/document-view/document-view.component.spec.ts index d669f30af..703550fc7 100644 --- a/frontend/src/app/document-view/document-view.component.spec.ts +++ b/frontend/src/app/document-view/document-view.component.spec.ts @@ -1,4 +1,4 @@ -import { ComponentFixture, TestBed, fakeAsync, tick, waitForAsync } from '@angular/core/testing'; +import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; import { By } from '@angular/platform-browser'; import * as _ from 'lodash'; import { mockCorpus, mockField } from '../../mock-data/corpus'; @@ -7,7 +7,6 @@ import { commonTestBed } from '../common-test-bed'; import { DocumentViewComponent } from './document-view.component'; import { makeDocument } from '../../mock-data/constructor-helpers'; -import { of } from 'rxjs'; describe('DocumentViewComponent', () => { let component: DocumentViewComponent; diff --git a/frontend/src/app/document/document-popup/document-popup.component.spec.ts b/frontend/src/app/document/document-popup/document-popup.component.spec.ts index 74546db68..b497ceecf 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.spec.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.spec.ts @@ -1,7 +1,14 @@ -import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; +import { ComponentFixture, TestBed, fakeAsync, tick, waitForAsync } from '@angular/core/testing'; +import { By } from '@angular/platform-browser'; import { DocumentPopupComponent } from './document-popup.component'; import { commonTestBed } from '../../common-test-bed'; +import { makeDocument } from '../../../mock-data/constructor-helpers'; +import { mockCorpus, mockCorpus2, mockField } from '../../../mock-data/corpus'; +import { DocumentPage } from '../../models/document-page'; +import { QueryModel } from '../../models'; +import { query } from '@angular/animations'; + describe('DocumentPopupComponent', () => { let component: DocumentPopupComponent; @@ -14,10 +21,27 @@ describe('DocumentPopupComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(DocumentPopupComponent); component = fixture.componentInstance; + const document = makeDocument({ great_field: 'Hello world!' }); + component.document = document; + component.page = new DocumentPage([document], 1, [mockField]); + component.queryModel = new QueryModel(mockCorpus); fixture.detectChanges(); }); it('should create', () => { expect(component).toBeTruthy(); }); + + it('does not show the NER toggle for corpora without named entities', () => { + expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeFalsy(); + }); + + it('shows the NER toggle for corpora with named entities', () => { + const setModel = component.queryModel; + const queryModel = new QueryModel(mockCorpus2); + component.queryModel = queryModel; + component.ngOnChanges({queryModel: {previousValue: setModel, currentValue: queryModel, firstChange: false, isFirstChange: null}}); + fixture.detectChanges(); + expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeTruthy(); + }); }); diff --git a/frontend/src/app/models/found-document.spec.ts b/frontend/src/app/models/found-document.spec.ts index b17fe8d38..b286fa8ec 100644 --- a/frontend/src/app/models/found-document.spec.ts +++ b/frontend/src/app/models/found-document.spec.ts @@ -35,7 +35,7 @@ const mockResponse = { } }; -fdescribe('FoundDocument', () => { +describe('FoundDocument', () => { const mockTagService = new TagServiceMock() as any; const mockEntityService = new EntityServiceMock() as any; diff --git a/frontend/src/mock-data/corpus.ts b/frontend/src/mock-data/corpus.ts index f1472315d..ae6e1d1eb 100644 --- a/frontend/src/mock-data/corpus.ts +++ b/frontend/src/mock-data/corpus.ts @@ -145,6 +145,7 @@ export const mockCorpus: Corpus = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: false, directDownloadLimit: 500, fields: [mockField, mockField2], languages: ['English'], @@ -163,6 +164,7 @@ export const mockCorpus2 = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: true, directDownloadLimit: 1000, fields: [mockField2], languages: ['English', 'French'], @@ -181,6 +183,7 @@ export const mockCorpus3: Corpus = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: false, directDownloadLimit: 2000, fields: [mockField, mockField2, mockField3, mockFieldDate, mockFieldMultipleChoice], languages: ['English'], From 3dcdd1acac787f1d52032333477d027323da2488 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 26 Jun 2024 14:26:42 +0200 Subject: [PATCH 23/30] fix merge conflicts --- frontend/src/app/document/document.module.ts | 2 +- frontend/src/app/manual/manual.module.ts | 2 +- .../src/app/{ => shared}/pipes/elasticsearch-highlight.pipe.ts | 2 +- frontend/src/app/{ => shared}/pipes/entity.pipe.ts | 2 +- frontend/src/app/{ => shared}/pipes/format-entity-class.pipe.ts | 0 frontend/src/app/{ => shared}/pipes/geo-data.pipe.ts | 2 +- frontend/src/app/{ => shared}/pipes/index.ts | 0 frontend/src/app/{ => shared}/pipes/regex-highlight.pipe.ts | 2 +- frontend/src/app/{ => shared}/pipes/snippet.pipe.ts | 0 9 files changed, 6 insertions(+), 6 deletions(-) rename frontend/src/app/{ => shared}/pipes/elasticsearch-highlight.pipe.ts (96%) rename frontend/src/app/{ => shared}/pipes/entity.pipe.ts (93%) rename frontend/src/app/{ => shared}/pipes/format-entity-class.pipe.ts (100%) rename frontend/src/app/{ => shared}/pipes/geo-data.pipe.ts (92%) rename frontend/src/app/{ => shared}/pipes/index.ts (100%) rename frontend/src/app/{ => shared}/pipes/regex-highlight.pipe.ts (94%) rename frontend/src/app/{ => shared}/pipes/snippet.pipe.ts (100%) diff --git a/frontend/src/app/document/document.module.ts b/frontend/src/app/document/document.module.ts index 1e60ee50a..0d52a5ca0 100644 --- a/frontend/src/app/document/document.module.ts +++ b/frontend/src/app/document/document.module.ts @@ -10,7 +10,7 @@ import { DocumentPopupComponent } from './document-popup/document-popup.componen import { DialogModule } from 'primeng/dialog'; import { DocumentPreviewComponent } from './document-preview/document-preview.component'; import { EntityLegendComponent } from './entity-legend/entity-legend.component'; -import { ElasticsearchHighlightPipe, EntityPipe, FormatEntityClassPipe, GeoDataPipe, SnippetPipe } from '../pipes'; +import { ElasticsearchHighlightPipe, EntityPipe, FormatEntityClassPipe, GeoDataPipe, SnippetPipe } from '../shared/pipes'; @NgModule({ declarations: [ diff --git a/frontend/src/app/manual/manual.module.ts b/frontend/src/app/manual/manual.module.ts index fc13467d9..61404fcbd 100644 --- a/frontend/src/app/manual/manual.module.ts +++ b/frontend/src/app/manual/manual.module.ts @@ -4,7 +4,7 @@ import { ManualNavigationComponent } from './manual-navigation.component'; import { ManualComponent } from './manual.component'; import { AboutComponent } from '../about/about.component'; import { PrivacyComponent } from '../privacy/privacy.component'; -import { RegexHighlightPipe } from '../pipes'; +import { RegexHighlightPipe } from '../shared/pipes'; @NgModule({ diff --git a/frontend/src/app/pipes/elasticsearch-highlight.pipe.ts b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts similarity index 96% rename from frontend/src/app/pipes/elasticsearch-highlight.pipe.ts rename to frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts index 3a35ae3cb..d7dc31375 100644 --- a/frontend/src/app/pipes/elasticsearch-highlight.pipe.ts +++ b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts @@ -1,6 +1,6 @@ import { Pipe, PipeTransform } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; -import { CorpusField, FoundDocument } from '../models'; +import { CorpusField, FoundDocument } from '../../models'; import * as _ from 'lodash'; @Pipe({ diff --git a/frontend/src/app/pipes/entity.pipe.ts b/frontend/src/app/shared/pipes/entity.pipe.ts similarity index 93% rename from frontend/src/app/pipes/entity.pipe.ts rename to frontend/src/app/shared/pipes/entity.pipe.ts index ebb556edf..ec515884a 100644 --- a/frontend/src/app/pipes/entity.pipe.ts +++ b/frontend/src/app/shared/pipes/entity.pipe.ts @@ -2,7 +2,7 @@ import { Pipe, PipeTransform } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; import { Observable } from 'rxjs'; -import { FoundDocument } from '../models'; +import { FoundDocument } from '../../models'; import * as _ from 'lodash'; @Pipe({ diff --git a/frontend/src/app/pipes/format-entity-class.pipe.ts b/frontend/src/app/shared/pipes/format-entity-class.pipe.ts similarity index 100% rename from frontend/src/app/pipes/format-entity-class.pipe.ts rename to frontend/src/app/shared/pipes/format-entity-class.pipe.ts diff --git a/frontend/src/app/pipes/geo-data.pipe.ts b/frontend/src/app/shared/pipes/geo-data.pipe.ts similarity index 92% rename from frontend/src/app/pipes/geo-data.pipe.ts rename to frontend/src/app/shared/pipes/geo-data.pipe.ts index cf7b18a23..08d9df047 100644 --- a/frontend/src/app/pipes/geo-data.pipe.ts +++ b/frontend/src/app/shared/pipes/geo-data.pipe.ts @@ -1,6 +1,6 @@ import { Pipe, PipeTransform } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; -import { CorpusField, FoundDocument } from '../models'; +import { CorpusField, FoundDocument } from '../../models'; @Pipe({ name: 'geoData' }) diff --git a/frontend/src/app/pipes/index.ts b/frontend/src/app/shared/pipes/index.ts similarity index 100% rename from frontend/src/app/pipes/index.ts rename to frontend/src/app/shared/pipes/index.ts diff --git a/frontend/src/app/pipes/regex-highlight.pipe.ts b/frontend/src/app/shared/pipes/regex-highlight.pipe.ts similarity index 94% rename from frontend/src/app/pipes/regex-highlight.pipe.ts rename to frontend/src/app/shared/pipes/regex-highlight.pipe.ts index 323292ffa..595499503 100644 --- a/frontend/src/app/pipes/regex-highlight.pipe.ts +++ b/frontend/src/app/shared/pipes/regex-highlight.pipe.ts @@ -1,6 +1,6 @@ import { Pipe, PipeTransform, SecurityContext } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; -import { HighlightService } from '../services/highlight.service'; +import { HighlightService } from '../../services/highlight.service'; @Pipe({ name: 'regexHighlight' diff --git a/frontend/src/app/pipes/snippet.pipe.ts b/frontend/src/app/shared/pipes/snippet.pipe.ts similarity index 100% rename from frontend/src/app/pipes/snippet.pipe.ts rename to frontend/src/app/shared/pipes/snippet.pipe.ts From 6416e1c3c2e316c4320436f150cf2fbad08c57eb Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 27 Jun 2024 10:38:59 +0200 Subject: [PATCH 24/30] replace ner suffix separator with colon --- backend/addcorpus/models.py | 2 +- backend/addcorpus/validation/creation.py | 4 ++-- backend/es/conftest.py | 4 ++-- backend/es/tests/test_named_entity_search.py | 6 +++--- backend/es/views.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index 962943b36..84349e205 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -270,7 +270,7 @@ def has_named_entities(self): index=self.es_index) fields = mapping[self.es_index].get( 'mappings', {}).get('properties', {}).keys() - if any(field.endswith('_ner') for field in fields): + if any(field.endswith(':ner') for field in fields): return True except: return False diff --git a/backend/addcorpus/validation/creation.py b/backend/addcorpus/validation/creation.py index fc18f7d31..445a272fa 100644 --- a/backend/addcorpus/validation/creation.py +++ b/backend/addcorpus/validation/creation.py @@ -124,9 +124,9 @@ def validate_name_is_not_a_route_parameter(value): def validate_name_has_no_ner_suffix(value): - if value.endswith('_ner'): + if value.endswith(':ner'): raise ValidationError( - f'{value} cannot be used as a field name: the suffix `_ner` is reserved for annotated_text fields' + f'{value} cannot be used as a field name: the suffix `:ner` is reserved for annotated_text fields' ) def mapping_can_be_searched(es_mapping): diff --git a/backend/es/conftest.py b/backend/es/conftest.py index 4545a2801..40d462e05 100644 --- a/backend/es/conftest.py +++ b/backend/es/conftest.py @@ -28,12 +28,12 @@ def es_ner_search_client(es_client, basic_mock_corpus, basic_corpus_public, inde # add data from mock corpus corpus = Corpus.objects.get(name=basic_mock_corpus) es_client.indices.put_mapping(index=corpus.configuration.es_index, properties={ - "content_ner": {"type": "annotated_text"}}) + "content:ner": {"type": "annotated_text"}}) es_client.index(index=corpus.configuration.es_index, document={ 'id': 'my_identifier', 'content': 'Guybrush Threepwood is looking for treasure on Monkey Island', - 'content_ner': '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)'}) + 'content:ner': '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)'}) # ES is "near real time", so give it a second before we start searching the index sleep(1) diff --git a/backend/es/tests/test_named_entity_search.py b/backend/es/tests/test_named_entity_search.py index a6dde20dc..3a58d0cfc 100644 --- a/backend/es/tests/test_named_entity_search.py +++ b/backend/es/tests/test_named_entity_search.py @@ -9,7 +9,7 @@ def test_ner_search_view(es_ner_search_client, client): def test_construct_ner_query(): viewset = NamedEntitySearchView() - fields = ['content_ner'] + fields = ['content:ner'] query = viewset.construct_named_entity_query(fields, 'my_identifier') expected = { "bool": { @@ -21,7 +21,7 @@ def test_construct_ner_query(): }, { "terms": { - "content_ner": ["LOC", "PER", "ORG", "MISC"] + "content:ner": ["LOC", "PER", "ORG", "MISC"] } } ] @@ -35,7 +35,7 @@ def test_find_named_entity_fields(es_ner_search_client): fields = viewset.find_named_entity_fields( es_ner_search_client, 'test-basic-corpus') assert len(fields) == 1 - assert fields[0] == 'content_ner' + assert fields[0] == 'content:ner' def test_find_entities(): diff --git a/backend/es/views.py b/backend/es/views.py index 645fe86e6..1e2036da5 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -131,7 +131,7 @@ def get(self, request, *args, **kwargs): source = results[0]['_source'] for field in fields: text_with_entities = source.get(field) - annotations.update({field.replace('_ner', ''): self.find_entities( + annotations.update({field.replace(':ner', ''): self.find_entities( text_with_entities, entity_classes)}) entities = [self.entity_dict.get(entity_class) for entity_class in list(entity_classes)] @@ -142,7 +142,7 @@ def find_named_entity_fields(self, client, index: str) -> list[str]: mapping = client.indices.get_mapping(index=index) fields = mapping[index]['mappings']['properties'] field_names = fields.keys() - return [name for name in field_names if name.endswith('_ner') and fields[name].get('type') == 'annotated_text'] + return [name for name in field_names if name.endswith(':ner') and fields[name].get('type') == 'annotated_text'] def construct_named_entity_query(self, fields: list[str], document_id: str) -> dict: return { From a964d56ab01e2394ea0b6f5771c4219df91464b2 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 27 Jun 2024 10:54:37 +0200 Subject: [PATCH 25/30] fix: use mark class instead of span --- backend/es/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/es/views.py b/backend/es/views.py index 1e2036da5..7bc06cf27 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -180,4 +180,4 @@ def substitute_annotation_with_tag(self, annotation: tuple, input_text: str, ent annotated = annotation[0][1:-1] entity_classes.update({entity_class}) return input_text.replace( - ''.join(annotation), f'{annotated}') + ''.join(annotation), f'{annotated} ') From db4ba62b65bb6bb4a9a0c45137ba0aab745f4299 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Thu, 27 Jun 2024 10:55:19 +0200 Subject: [PATCH 26/30] revertme: use underscore for legacy index --- backend/addcorpus/models.py | 2 +- backend/es/views.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index 84349e205..962943b36 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -270,7 +270,7 @@ def has_named_entities(self): index=self.es_index) fields = mapping[self.es_index].get( 'mappings', {}).get('properties', {}).keys() - if any(field.endswith(':ner') for field in fields): + if any(field.endswith('_ner') for field in fields): return True except: return False diff --git a/backend/es/views.py b/backend/es/views.py index 7bc06cf27..b41173ece 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -131,7 +131,7 @@ def get(self, request, *args, **kwargs): source = results[0]['_source'] for field in fields: text_with_entities = source.get(field) - annotations.update({field.replace(':ner', ''): self.find_entities( + annotations.update({field.replace('_ner', ''): self.find_entities( text_with_entities, entity_classes)}) entities = [self.entity_dict.get(entity_class) for entity_class in list(entity_classes)] @@ -142,7 +142,7 @@ def find_named_entity_fields(self, client, index: str) -> list[str]: mapping = client.indices.get_mapping(index=index) fields = mapping[index]['mappings']['properties'] field_names = fields.keys() - return [name for name in field_names if name.endswith(':ner') and fields[name].get('type') == 'annotated_text'] + return [name for name in field_names if name.endswith('_ner') and fields[name].get('type') == 'annotated_text'] def construct_named_entity_query(self, fields: list[str], document_id: str) -> dict: return { From f9ba8a1090af91c4525ea939e2c7b3a922d94cac Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 1 Jul 2024 18:04:03 +0200 Subject: [PATCH 27/30] implement review suggestions --- backend/es/views.py | 41 +++++++++---------- frontend/src/_utilities.scss | 8 ++-- .../edit-definition.component.ts | 1 - .../document-page.component.html | 5 ++- .../document-page/document-page.component.ts | 13 +++++- .../document-view.component.html | 25 ++++++++--- .../document-view.component.scss | 2 - .../document-view/document-view.component.ts | 3 +- .../document-popup.component.html | 2 +- frontend/src/app/document/document.module.ts | 5 +-- .../entity-legend.component.html | 16 ++++++-- .../entity-legend.component.scss | 14 ++++--- .../entity-legend/entity-legend.component.ts | 27 +++++++----- frontend/src/app/models/found-document.ts | 16 ++++---- frontend/src/app/models/search-results.ts | 7 +++- frontend/src/app/shared/icons.ts | 13 ++++-- .../pipes/elasticsearch-highlight.pipe.ts | 14 ++----- frontend/src/app/shared/pipes/entity.pipe.ts | 25 ----------- .../shared/pipes/format-entity-class.pipe.ts | 20 --------- frontend/src/app/shared/pipes/index.ts | 3 +- .../app/shared/pipes/paragraph.pipe.spec.ts | 8 ++++ .../src/app/shared/pipes/paragraph.pipe.ts | 23 +++++++++++ .../app/shared/toggle/toggle.component.html | 2 +- frontend/src/styles.scss | 35 +++++++++------- 24 files changed, 178 insertions(+), 150 deletions(-) delete mode 100644 frontend/src/app/shared/pipes/entity.pipe.ts delete mode 100644 frontend/src/app/shared/pipes/format-entity-class.pipe.ts create mode 100644 frontend/src/app/shared/pipes/paragraph.pipe.spec.ts create mode 100644 frontend/src/app/shared/pipes/paragraph.pipe.ts diff --git a/backend/es/views.py b/backend/es/views.py index b41173ece..f372be8b9 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -107,10 +107,10 @@ class NamedEntitySearchView(APIView): Perform search via Elasticsearch and reformat the output ''' entity_dict = { - 'PER': 'Person', - 'LOC': 'Location', - 'ORG': 'Organization', - 'MISC': 'Miscellaneous' + 'PER': 'person', + 'LOC': 'location', + 'ORG': 'organization', + 'MISC': 'miscellaneous' } permission_classes = [CorpusAccessPermission] @@ -125,17 +125,14 @@ def get(self, request, *args, **kwargs): response = client.search(index=index, query=query, fields=fields) results = hits(response) annotations = {} - entity_classes = set() response = {} if len(results): source = results[0]['_source'] for field in fields: text_with_entities = source.get(field) annotations.update({field.replace('_ner', ''): self.find_entities( - text_with_entities, entity_classes)}) - entities = [self.entity_dict.get(entity_class) - for entity_class in list(entity_classes)] - response = {'annotations': annotations, 'entities': entities} + text_with_entities)}) + response = {'annotations': annotations} return Response(response) def find_named_entity_fields(self, client, index: str) -> list[str]: @@ -166,18 +163,18 @@ def add_terms(self, fields: list[str]) -> list[dict]: } for field in fields ] - def find_entities(self, input_text: str, entity_classes: set) -> str: + def find_entities(self, input_text: str) -> str: # regex pattern to match annotations of format "[Wally](Person)" and split it into two groups pattern = re.compile('(\[[^]]+\])(\([A-Z]+\))') - annotations = list(set(pattern.findall(input_text))) - for annotation in annotations: - input_text = self.substitute_annotation_with_tag( - annotation, input_text, entity_classes) - return input_text - - def substitute_annotation_with_tag(self, annotation: tuple, input_text: str, entity_classes: set) -> str: - entity_class = annotation[1][1:-1] - annotated = annotation[0][1:-1] - entity_classes.update({entity_class}) - return input_text.replace( - ''.join(annotation), f'{annotated} ') + annotations = pattern.split(input_text) + output = [] + for index, annotation in enumerate(annotations): + if annotation.startswith('('): + continue + elif annotation.startswith('['): + output.append( + {'entity': self.entity_dict.get(annotations[index+1][1:-1]), 'text': annotation[1:-1]}) + else: + if annotation: + output.append({'entity': 'flat', 'text': annotation}) + return output diff --git a/frontend/src/_utilities.scss b/frontend/src/_utilities.scss index 6de7fcc9f..e1f728cf0 100644 --- a/frontend/src/_utilities.scss +++ b/frontend/src/_utilities.scss @@ -30,10 +30,10 @@ $section-padding: 3rem 1.5rem; $boxShadow: 0 2px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px rgba(10, 10, 10, 0.1); $boxShadowHover: 0px 5px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px $primary; -$entity-person: #ADDBE0; -$entity-location: #ADF489; -$entity-organization: #E6D5A8; -$entity-miscellaneous: #EF9AB4; +$entity-person: #303F9F; +$entity-location: #4e8f2d; +$entity-organization: #efb71d; +$entity-miscellaneous: #ee5986; @import "bulma/sass/utilities/_all"; diff --git a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts index b5c16bba0..262b26286 100644 --- a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts +++ b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts @@ -4,7 +4,6 @@ import { Subject } from 'rxjs'; import { CorpusDefinition } from '../../models/corpus-definition'; import { ApiService } from '../../services'; import { ActivatedRoute } from '@angular/router'; -import { filter, take } from 'rxjs/operators'; import * as _ from 'lodash'; import { HttpErrorResponse } from '@angular/common/http'; diff --git a/frontend/src/app/document-page/document-page.component.html b/frontend/src/app/document-page/document-page.component.html index 26ebf2ca6..8c2635901 100644 --- a/frontend/src/app/document-page/document-page.component.html +++ b/frontend/src/app/document-page/document-page.component.html @@ -1,7 +1,8 @@
-
+
+ Show named entities
@@ -14,7 +15,7 @@
- +
diff --git a/frontend/src/app/document-page/document-page.component.ts b/frontend/src/app/document-page/document-page.component.ts index 4d44a7d61..488accb81 100644 --- a/frontend/src/app/document-page/document-page.component.ts +++ b/frontend/src/app/document-page/document-page.component.ts @@ -19,10 +19,14 @@ export class DocumentPageComponent implements OnInit { documentId: string; document: FoundDocument; - documentNotFound: boolean; + documentFound: boolean; documentIcons = documentIcons; + showNEROption: boolean; + + showNamedEntities = false; + constructor( private corpusService: CorpusService, private elasticSearchService: ElasticSearchService, @@ -55,6 +59,7 @@ export class DocumentPageComponent implements OnInit { ]).subscribe(([params, corpus]) => { this.corpus = corpus; this.documentId = params['id']; + this.showNEROption = this.corpus.hasNamedEntities; this.getDocument(this.documentId); this.title.setTitle(pageTitle(`Document in ${corpus.title}`)); }); @@ -63,9 +68,13 @@ export class DocumentPageComponent implements OnInit { getDocument(id: string) { this.elasticSearchService.getDocumentById(id, this.corpus).then(document => { this.document = document; - this.documentNotFound = _.isUndefined(this.document); + this.documentFound = !_.isUndefined(this.document); }); } + toggleNER(active: boolean): void { + this.showNamedEntities = active; + } + } diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index ce83d7098..e498a86e2 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -21,7 +21,7 @@
+ [innerHtml]="document.fieldValue(field) | highlight:query:true">
+ [innerHtml]="document.fieldValue(field) | snippet">
+ [innerHtml]="field | elasticsearchHighlight:document | paragraph"> {{field | geoData:document}} @@ -38,17 +38,30 @@
-
+
+
+ + + {{textSegment.text}} + + + + {{textSegment.text | paragraph}} + + +
+ +
+
+
+ [innerHtml]="field | elasticsearchHighlight:document | paragraph">
- diff --git a/frontend/src/app/document-view/document-view.component.scss b/frontend/src/app/document-view/document-view.component.scss index 9fa3f278e..af60ed17b 100644 --- a/frontend/src/app/document-view/document-view.component.scss +++ b/frontend/src/app/document-view/document-view.component.scss @@ -8,5 +8,3 @@ table { color: $primary; } } - - \ No newline at end of file diff --git a/frontend/src/app/document-view/document-view.component.ts b/frontend/src/app/document-view/document-view.component.ts index 930da218e..203c3d38b 100644 --- a/frontend/src/app/document-view/document-view.component.ts +++ b/frontend/src/app/document-view/document-view.component.ts @@ -3,7 +3,7 @@ import { Component, Input, OnChanges, SimpleChanges } from '@angular/core'; import { CorpusField, FoundDocument, Corpus, QueryModel } from '../models/index'; import { DocumentView } from '../models/document-page'; import * as _ from 'lodash'; -import { documentIcons } from '../shared/icons'; +import { documentIcons, entityIcons } from '../shared/icons'; @Component({ selector: 'ia-document-view', @@ -28,6 +28,7 @@ export class DocumentViewComponent implements OnChanges { public showEntities: boolean; documentIcons = documentIcons; + entityIcons = entityIcons; /** active tab on opening */ activeTab: string; diff --git a/frontend/src/app/document/document-popup/document-popup.component.html b/frontend/src/app/document/document-popup/document-popup.component.html index 8f84e345d..705199abb 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.html +++ b/frontend/src/app/document/document-popup/document-popup.component.html @@ -3,7 +3,7 @@ [responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true"> Document {{document.position}} of {{page.total}} - NER + Show named entities diff --git a/frontend/src/app/document/document.module.ts b/frontend/src/app/document/document.module.ts index 0d52a5ca0..def98c70e 100644 --- a/frontend/src/app/document/document.module.ts +++ b/frontend/src/app/document/document.module.ts @@ -10,7 +10,7 @@ import { DocumentPopupComponent } from './document-popup/document-popup.componen import { DialogModule } from 'primeng/dialog'; import { DocumentPreviewComponent } from './document-preview/document-preview.component'; import { EntityLegendComponent } from './entity-legend/entity-legend.component'; -import { ElasticsearchHighlightPipe, EntityPipe, FormatEntityClassPipe, GeoDataPipe, SnippetPipe } from '../shared/pipes'; +import { ElasticsearchHighlightPipe, GeoDataPipe, ParagraphPipe, SnippetPipe } from '../shared/pipes'; @NgModule({ declarations: [ @@ -21,9 +21,8 @@ import { ElasticsearchHighlightPipe, EntityPipe, FormatEntityClassPipe, GeoDataP DocumentPreviewComponent, ElasticsearchHighlightPipe, EntityLegendComponent, - EntityPipe, - FormatEntityClassPipe, GeoDataPipe, + ParagraphPipe, SnippetPipe ], imports: [ diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.html b/frontend/src/app/document/entity-legend/entity-legend.component.html index 358862263..41f9622f6 100644 --- a/frontend/src/app/document/entity-legend/entity-legend.component.html +++ b/frontend/src/app/document/entity-legend/entity-legend.component.html @@ -1,3 +1,13 @@ -
- {{entity}} -
+
+
    +
  • + + + {{entity.slice(0,1).toUpperCase() + entity.slice(1)}} + +
  • +
+ + No named entities were found in this text. + +
diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.scss b/frontend/src/app/document/entity-legend/entity-legend.component.scss index 7f7c6ef4e..459455661 100644 --- a/frontend/src/app/document/entity-legend/entity-legend.component.scss +++ b/frontend/src/app/document/entity-legend/entity-legend.component.scss @@ -1,6 +1,8 @@ -span.dot { - height: .7rem; - width: .7rem; - margin-right: .4rem; - display: inline-block; -} \ No newline at end of file +ul { + list-style-type: none; + margin-left: 0; +} + +.no-entities-message { + opacity: 0.6; +} diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.ts b/frontend/src/app/document/entity-legend/entity-legend.component.ts index da297a25a..882fcd59c 100644 --- a/frontend/src/app/document/entity-legend/entity-legend.component.ts +++ b/frontend/src/app/document/entity-legend/entity-legend.component.ts @@ -1,21 +1,28 @@ -import { Component, Input, OnInit } from '@angular/core'; +import { Component, Input, OnChanges } from '@angular/core'; +import * as _ from 'lodash'; + +import { entityIcons } from '../../shared/icons'; +import { FieldEntities } from '../../models'; @Component({ selector: 'ia-entity-legend', templateUrl: './entity-legend.component.html', styleUrls: ['./entity-legend.component.scss'] }) -export class EntityLegendComponent implements OnInit { - - @Input() entities: string[]; +export class EntityLegendComponent implements OnChanges { + @Input() entityAnnotations: FieldEntities[]; - constructor() { } + public entityIcons = entityIcons; + public entities: string[]; - ngOnInit(): void { - } + constructor() { } - formatClass(entityName: string): string { - return `entity-${entityName.toLowerCase().slice(0,3)}`; - } + ngOnChanges(): void { + if (!this.entityAnnotations) { + this.entities = null; + } else { + this.entities = _.uniq(this.entityAnnotations.map((item) => item.entity)).filter((value) => value !=='flat'); + } + } } diff --git a/frontend/src/app/models/found-document.ts b/frontend/src/app/models/found-document.ts index 792e7b29e..db83ba9b1 100644 --- a/frontend/src/app/models/found-document.ts +++ b/frontend/src/app/models/found-document.ts @@ -1,4 +1,6 @@ import * as _ from 'lodash'; +import { map, mergeMap, shareReplay, take } from 'rxjs/operators'; + import { makeContextParams } from '../utils/document-context'; import { Corpus, CorpusField } from './corpus'; import { FieldValues, HighlightResult, SearchHit } from './elasticsearch'; @@ -6,7 +8,7 @@ import { Tag } from './tag'; import { Observable, Subject, merge, timer } from 'rxjs'; import { EntityService } from '../services/entity.service'; import { TagService } from '../services/tag.service'; -import { map, mergeMap, shareReplay, take } from 'rxjs/operators'; +import { FieldEntities } from './search-results'; export class FoundDocument { id: string; @@ -29,8 +31,7 @@ export class FoundDocument { tags$: Observable; /** named entities associated with the document */ - entities: string[]; - annotations$: Observable<{[fieldName: string]: string}[]>; + entityAnnotations$: Observable<{[fieldName: string]: FieldEntities[]}>; private tagsChanged$ = new Subject(); constructor( @@ -56,7 +57,7 @@ export class FoundDocument { shareReplay(1), ); - this.annotations$ = created$.pipe( + this.entityAnnotations$ = created$.pipe( mergeMap(() => this.fetchAnnotatedEntities()), shareReplay(1), ); @@ -124,13 +125,10 @@ export class FoundDocument { ); } - private fetchAnnotatedEntities(): Observable<{[fieldName: string]: string}[]> { + private fetchAnnotatedEntities(): Observable<{[fieldName: string]: FieldEntities[]}> { const response$ = this.entityService.getDocumentEntities(this.corpus, this.id); return response$.pipe( - map( response => { - this.entities = response.entities; - return response.annotations || []; - }) + map( response => response.annotations || {} ) ); } diff --git a/frontend/src/app/models/search-results.ts b/frontend/src/app/models/search-results.ts index 6feb3a036..212a001ba 100644 --- a/frontend/src/app/models/search-results.ts +++ b/frontend/src/app/models/search-results.ts @@ -83,9 +83,12 @@ export interface QueryFeedback { similarTerms?: string[]; } +export interface FieldEntities { + [entityType: string] : string +} + export interface NamedEntitiesResult { - entities: string[]; - annotations: {[fieldName: string]: string}[]; + annotations: {[fieldName: string]: FieldEntities[]}; } export interface TaskResult { task_ids: string[] }; diff --git a/frontend/src/app/shared/icons.ts b/frontend/src/app/shared/icons.ts index daefdf6bf..21b16626d 100644 --- a/frontend/src/app/shared/icons.ts +++ b/frontend/src/app/shared/icons.ts @@ -4,9 +4,9 @@ import { } from '@fortawesome/free-regular-svg-icons'; import { IconDefinition as SolidIconDefinition, - faAngleDown, faAngleUp, faArrowLeft, faArrowRight, faAt, faBook, faBookOpen, faChartColumn, - faCheck, faChevronDown, faChevronLeft, faChevronRight, faCog, faCogs, faDatabase, faDiagramProject, - faDownload, faEdit, faEnvelope, faEye, faFilter, faHistory, faImage, faInfo, faInfoCircle, faLink, faList, faLock, + faAngleDown, faAngleUp, faArrowLeft, faArrowRight, faAt, faBook, faBookmark, faBookOpen, faBuilding, faChartColumn, + faCheck, faChevronLeft, faChevronRight, faCog, faCogs, faDatabase, faDiagramProject, + faDownload, faEnvelope, faEye, faFilter, faHistory, faImage, faInfo, faInfoCircle, faLink, faList, faLocationDot, faLock, faMinus, faPalette, faPencil, faPlus, faQuestionCircle, faSearch, faSearchMinus, faSearchPlus, faSignOut, faSortAlphaAsc, faSortAlphaDesc, faSortNumericAsc, faSortNumericDesc, faSquare, faTable, faTags, faTimes, faTrashCan, faUndo, faUpload, faUser @@ -109,3 +109,10 @@ export const documentIcons: Icons = { scanAlt: faNewspaper, context: faBookOpen, }; + +export const entityIcons: Icons = { + person: faUser, + location: faLocationDot, + organization: faBuilding, + miscellaneous: faBookmark, +} diff --git a/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts index d7dc31375..ae1d374da 100644 --- a/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts +++ b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts @@ -1,14 +1,12 @@ import { Pipe, PipeTransform } from '@angular/core'; -import { DomSanitizer } from '@angular/platform-browser'; -import { CorpusField, FoundDocument } from '../../models'; import * as _ from 'lodash'; +import { CorpusField, FoundDocument } from '../../models'; + @Pipe({ name: 'elasticsearchHighlight' }) export class ElasticsearchHighlightPipe implements PipeTransform { - constructor(private sanitizer: DomSanitizer) { - } /** * Transforms a text to display highlights fetched from Elasticsearch @@ -23,8 +21,7 @@ export class ElasticsearchHighlightPipe implements PipeTransform { } const highlighted = this.highlightedInnerHtml(field, document); - const paragraphs = this.addParagraphTags(highlighted); - return this.sanitizer.bypassSecurityTrustHtml(paragraphs); + return highlighted; } highlightedInnerHtml(field: CorpusField, document: FoundDocument) { @@ -38,11 +35,6 @@ export class ElasticsearchHighlightPipe implements PipeTransform { return highlighted; } - addParagraphTags(content: string | string[]) { - const paragraphs = typeof content === 'string' ? content.split('\n') : content; - return paragraphs.map(p => `

${p}

`).join(' '); - } - stripTags(htmlString: string){ const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); return parseHTML.body.textContent || ''; diff --git a/frontend/src/app/shared/pipes/entity.pipe.ts b/frontend/src/app/shared/pipes/entity.pipe.ts deleted file mode 100644 index ec515884a..000000000 --- a/frontend/src/app/shared/pipes/entity.pipe.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { Pipe, PipeTransform } from '@angular/core'; -import { DomSanitizer } from '@angular/platform-browser'; -import { Observable } from 'rxjs'; - -import { FoundDocument } from '../../models'; -import * as _ from 'lodash'; - -@Pipe({ - name: 'entity' -}) -export class EntityPipe implements PipeTransform { - constructor(private sanitizer: DomSanitizer) { - } - - /** - * Transform field with annotated entities - * - * @param document FoundDocument holding the actual data - */ - transform(annotations$: Observable<{[fieldName: string]: string}[]>, document: FoundDocument, fieldName: string) { - const newText = annotations$[fieldName]; - return newText || document.fieldValues[fieldName]; - } - -} diff --git a/frontend/src/app/shared/pipes/format-entity-class.pipe.ts b/frontend/src/app/shared/pipes/format-entity-class.pipe.ts deleted file mode 100644 index 11f935005..000000000 --- a/frontend/src/app/shared/pipes/format-entity-class.pipe.ts +++ /dev/null @@ -1,20 +0,0 @@ - -import { Pipe, PipeTransform } from '@angular/core'; -import * as _ from 'lodash'; -@Pipe({ - name: 'formatEntityClass' -}) -export class FormatEntityClassPipe implements PipeTransform { - constructor() { - } - - /** - * Transform field with annotated entities - * - * @param document FoundDocument holding the actual data - */ - transform(entityName: string) { - return `entity-${entityName.toLowerCase().slice(0,3)}`; - } - -} diff --git a/frontend/src/app/shared/pipes/index.ts b/frontend/src/app/shared/pipes/index.ts index d9e21dbdc..0bf4c0f5b 100644 --- a/frontend/src/app/shared/pipes/index.ts +++ b/frontend/src/app/shared/pipes/index.ts @@ -1,6 +1,5 @@ export * from './elasticsearch-highlight.pipe'; -export * from './entity.pipe'; -export * from './format-entity-class.pipe'; export * from './geo-data.pipe'; +export * from './paragraph.pipe'; export * from './regex-highlight.pipe'; export * from './snippet.pipe'; diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts b/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts new file mode 100644 index 000000000..10f3ec4c0 --- /dev/null +++ b/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts @@ -0,0 +1,8 @@ +import { ParagraphPipe } from './paragraph.pipe'; + +describe('ParagraphPipe', () => { + it('create an instance', () => { + const pipe = new ParagraphPipe(); + expect(pipe).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.ts b/frontend/src/app/shared/pipes/paragraph.pipe.ts new file mode 100644 index 000000000..877d6fac2 --- /dev/null +++ b/frontend/src/app/shared/pipes/paragraph.pipe.ts @@ -0,0 +1,23 @@ +import { Pipe, PipeTransform } from '@angular/core'; + + +@Pipe({ + name: 'paragraph', +}) +export class ParagraphPipe implements PipeTransform { + + transform(content: string | string[]): unknown { + const splitText = this.addParagraphTags(content); + return splitText; + } + + addParagraphTags(content: string | string[]) { + const paragraphs = typeof content === 'string' ? content.split('\n') : content; + if (paragraphs.length == 1) { + return content; + } + return paragraphs.map(p => `

${p}

`).join(' '); + } + + +} diff --git a/frontend/src/app/shared/toggle/toggle.component.html b/frontend/src/app/shared/toggle/toggle.component.html index 64fafd1de..8b855a255 100644 --- a/frontend/src/app/shared/toggle/toggle.component.html +++ b/frontend/src/app/shared/toggle/toggle.component.html @@ -1,4 +1,4 @@
-
\ No newline at end of file +
diff --git a/frontend/src/styles.scss b/frontend/src/styles.scss index 3b13bf5f7..942fe4314 100644 --- a/frontend/src/styles.scss +++ b/frontend/src/styles.scss @@ -72,23 +72,30 @@ a.dropdown-item[disabled] { } } -span[class^='entity'] { - padding-left: .4em; +@mixin mark-entity($color) { + background-color: rgb(from $color r g b /.2); + border-bottom: .2em solid; + border-color: $color; + + .entity-icon, &.entity-icon { + padding-left: .3em; + padding-right: .3em; + color: $color; + } } -.entity-per { - background-color: $entity-person; - clip-path: polygon(100% 0%, 100% 100%, 0% 100%, 5px 50%, 0% 0%); +.entity-person { + @include mark-entity($entity-person); } -.entity-loc { - background-color: $entity-location; - clip-path: polygon(5px 0%, 100% 0%, 100% 100%, 5px 100%, 0% 50%); + +.entity-location { + @include mark-entity($entity-location); } -.entity-org { - background-color: $entity-organization; - clip-path: polygon(0 0, 100% 0%, 100% 100%, 5px 100%); + +.entity-organization { + @include mark-entity($entity-organization); } -.entity-mis { - background-color: $entity-miscellaneous; - clip-path: polygon(5px 0, 100% 0%, 100% 100%, 0% 100%); + +.entity-miscellaneous { + @include mark-entity($entity-miscellaneous); } From 0ba02ebc642b8414c065a38930702f9c7bd20c51 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Mon, 1 Jul 2024 18:05:58 +0200 Subject: [PATCH 28/30] Revert "revertme: use underscore for legacy index" This reverts commit db4ba62b65bb6bb4a9a0c45137ba0aab745f4299. --- backend/addcorpus/models.py | 2 +- backend/es/views.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index 962943b36..84349e205 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -270,7 +270,7 @@ def has_named_entities(self): index=self.es_index) fields = mapping[self.es_index].get( 'mappings', {}).get('properties', {}).keys() - if any(field.endswith('_ner') for field in fields): + if any(field.endswith(':ner') for field in fields): return True except: return False diff --git a/backend/es/views.py b/backend/es/views.py index f372be8b9..355b86bf2 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -130,7 +130,7 @@ def get(self, request, *args, **kwargs): source = results[0]['_source'] for field in fields: text_with_entities = source.get(field) - annotations.update({field.replace('_ner', ''): self.find_entities( + annotations.update({field.replace(':ner', ''): self.find_entities( text_with_entities)}) response = {'annotations': annotations} return Response(response) @@ -139,7 +139,7 @@ def find_named_entity_fields(self, client, index: str) -> list[str]: mapping = client.indices.get_mapping(index=index) fields = mapping[index]['mappings']['properties'] field_names = fields.keys() - return [name for name in field_names if name.endswith('_ner') and fields[name].get('type') == 'annotated_text'] + return [name for name in field_names if name.endswith(':ner') and fields[name].get('type') == 'annotated_text'] def construct_named_entity_query(self, fields: list[str], document_id: str) -> dict: return { From ee87449b85646fd3eea97448441e600def75af96 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 3 Jul 2024 10:04:17 +0200 Subject: [PATCH 29/30] fix backend test --- backend/es/tests/test_named_entity_search.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/es/tests/test_named_entity_search.py b/backend/es/tests/test_named_entity_search.py index 3a58d0cfc..1335c63b8 100644 --- a/backend/es/tests/test_named_entity_search.py +++ b/backend/es/tests/test_named_entity_search.py @@ -41,9 +41,8 @@ def test_find_named_entity_fields(es_ner_search_client): def test_find_entities(): viewset = NamedEntitySearchView() text = '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)' - entity_classes = set() - output = viewset.find_entities(text, entity_classes) - expected = 'Guybrush Threepwood is looking for treasure on Monkey Island' + output = viewset.find_entities(text) + expected = [{'entity': 'person', 'text': 'Guybrush Threepwood'}, + {'entity': 'flat', 'text': ' is looking for treasure on '}, + {'entity': 'location', 'text': 'Monkey Island'}] assert output == expected - assert len(list(entity_classes)) == 2 - assert all(entity in list(entity_classes) for entity in ['PER', 'LOC']) From 7c92ce247324cae06ab8404f67703aa3f7530f7c Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Wed, 3 Jul 2024 11:52:28 +0200 Subject: [PATCH 30/30] fix frontend unit tests --- backend/es/views.py | 3 +-- frontend/src/app/common-test-bed.ts | 2 ++ frontend/src/app/document-view/document-view.component.html | 2 +- .../src/app/document-view/document-view.component.spec.ts | 4 +++- frontend/src/app/document/document.module.ts | 3 ++- frontend/src/app/models/found-document.ts | 5 +---- frontend/src/app/models/search-results.ts | 2 +- frontend/src/app/shared/pipes/paragraph.pipe.ts | 2 +- frontend/src/mock-data/entity.ts | 4 +++- 9 files changed, 15 insertions(+), 12 deletions(-) diff --git a/backend/es/views.py b/backend/es/views.py index 355b86bf2..55e36ed7c 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -132,8 +132,7 @@ def get(self, request, *args, **kwargs): text_with_entities = source.get(field) annotations.update({field.replace(':ner', ''): self.find_entities( text_with_entities)}) - response = {'annotations': annotations} - return Response(response) + return Response(annotations) def find_named_entity_fields(self, client, index: str) -> list[str]: mapping = client.indices.get_mapping(index=index) diff --git a/frontend/src/app/common-test-bed.ts b/frontend/src/app/common-test-bed.ts index eecd514e4..888762621 100644 --- a/frontend/src/app/common-test-bed.ts +++ b/frontend/src/app/common-test-bed.ts @@ -2,6 +2,7 @@ import { TestBed } from '@angular/core/testing'; import { ElementRef } from '@angular/core'; import { RouterTestingModule } from '@angular/router/testing'; import { HttpClientModule } from '@angular/common/http'; +import {FontAwesomeTestingModule} from '@fortawesome/angular-fontawesome/testing'; import { appRoutes, declarations, imports, providers } from './app.module'; @@ -28,6 +29,7 @@ import { SimpleStore } from './store/simple-store'; export const commonTestBed = () => { const filteredImports = imports.filter(value => !(value in [HttpClientModule])); filteredImports.push(RouterTestingModule.withRoutes(appRoutes)); + filteredImports.push(FontAwesomeTestingModule) const filteredProviders = providers.filter(provider => !( provider in [ApiService, CorpusService, DialogService, ElasticSearchService, SearchService])); filteredProviders.push( diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index e498a86e2..26c464270 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -44,7 +44,7 @@ {{textSegment.text}} - + {{textSegment.text | paragraph}} diff --git a/frontend/src/app/document-view/document-view.component.spec.ts b/frontend/src/app/document-view/document-view.component.spec.ts index 703550fc7..9f1e799ee 100644 --- a/frontend/src/app/document-view/document-view.component.spec.ts +++ b/frontend/src/app/document-view/document-view.component.spec.ts @@ -45,10 +45,12 @@ describe('DocumentViewComponent', () => { expect(debug[1].attributes['id']).toBe('tab-scan'); }); - it('shows a named entity legend if showEntities is true', () => { + it('shows named entities if showEntities is true', async () => { expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeFalsy(); component.showEntities = true; fixture.detectChanges(); + await fixture.whenStable(); + fixture.detectChanges(); expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeTruthy(); }); diff --git a/frontend/src/app/document/document.module.ts b/frontend/src/app/document/document.module.ts index def98c70e..67822bcc8 100644 --- a/frontend/src/app/document/document.module.ts +++ b/frontend/src/app/document/document.module.ts @@ -19,8 +19,8 @@ import { ElasticsearchHighlightPipe, GeoDataPipe, ParagraphPipe, SnippetPipe } f SearchRelevanceComponent, DocumentPopupComponent, DocumentPreviewComponent, - ElasticsearchHighlightPipe, EntityLegendComponent, + ElasticsearchHighlightPipe, GeoDataPipe, ParagraphPipe, SnippetPipe @@ -36,6 +36,7 @@ import { ElasticsearchHighlightPipe, GeoDataPipe, ParagraphPipe, SnippetPipe } f DocumentViewComponent, DocumentPageComponent, DocumentPopupComponent, + EntityLegendComponent, SearchRelevanceComponent, ] }) diff --git a/frontend/src/app/models/found-document.ts b/frontend/src/app/models/found-document.ts index db83ba9b1..6fcbcb134 100644 --- a/frontend/src/app/models/found-document.ts +++ b/frontend/src/app/models/found-document.ts @@ -126,10 +126,7 @@ export class FoundDocument { } private fetchAnnotatedEntities(): Observable<{[fieldName: string]: FieldEntities[]}> { - const response$ = this.entityService.getDocumentEntities(this.corpus, this.id); - return response$.pipe( - map( response => response.annotations || {} ) - ); + return this.entityService.getDocumentEntities(this.corpus, this.id); } private setTags(tags: Tag[]): Observable { diff --git a/frontend/src/app/models/search-results.ts b/frontend/src/app/models/search-results.ts index 212a001ba..1eac5b1ff 100644 --- a/frontend/src/app/models/search-results.ts +++ b/frontend/src/app/models/search-results.ts @@ -88,7 +88,7 @@ export interface FieldEntities { } export interface NamedEntitiesResult { - annotations: {[fieldName: string]: FieldEntities[]}; + [fieldName: string]: FieldEntities[] } export interface TaskResult { task_ids: string[] }; diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.ts b/frontend/src/app/shared/pipes/paragraph.pipe.ts index 877d6fac2..eebafc51a 100644 --- a/frontend/src/app/shared/pipes/paragraph.pipe.ts +++ b/frontend/src/app/shared/pipes/paragraph.pipe.ts @@ -13,7 +13,7 @@ export class ParagraphPipe implements PipeTransform { addParagraphTags(content: string | string[]) { const paragraphs = typeof content === 'string' ? content.split('\n') : content; - if (paragraphs.length == 1) { + if (!paragraphs || paragraphs.length === 1) { return content; } return paragraphs.map(p => `

${p}

`).join(' '); diff --git a/frontend/src/mock-data/entity.ts b/frontend/src/mock-data/entity.ts index 3fefbfc25..9452852ec 100644 --- a/frontend/src/mock-data/entity.ts +++ b/frontend/src/mock-data/entity.ts @@ -5,6 +5,8 @@ import { Corpus, NamedEntitiesResult } from '../app/models'; export class EntityServiceMock { public getDocumentEntities(corpus: Corpus, id: string): Observable { - return of({annotations: [{'content': 'Wally was last seen in Paris'}], entities: ['location', 'person']}) + return of({speech: [{entity: 'person', text: 'Wally'}, + {entity: 'flat', text: ' was last seen in '}, + {entity: 'location', text: 'Paris'}]}) } }