diff --git a/backend/Dockerfile b/backend/Dockerfile index 3548587c4..2c58b766e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -3,7 +3,7 @@ FROM docker.io/library/python:3.9 # Setting this means stdout and stderr streams are sent to terminal in real time ENV PYTHONUNBUFFERED 1 # Get required libraries for xmlsec -RUN apt-get -y update +RUN apt-get -y update && apt-get -y upgrade RUN apt-get install -y pkg-config libxml2-dev libxmlsec1-dev libxmlsec1-openssl default-libmysqlclient-dev RUN pip install --upgrade pip diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index 91dae899d..2d6c8927a 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -4,8 +4,8 @@ from addcorpus.validation.creation import ( validate_es_mapping, validate_field_language, validate_implication, validate_language_code, validate_mimetype, - validate_name_is_not_a_route_parameter, validate_search_filter, - validate_search_filter_with_mapping, + validate_name_is_not_a_route_parameter, validate_name_has_no_ner_suffix, + validate_search_filter, validate_search_filter_with_mapping, validate_searchable_field_has_full_text_search, validate_sort_configuration, validate_visualizations_with_mapping, validate_source_data_directory, @@ -21,6 +21,8 @@ from django.db import models from django.db.models.constraints import UniqueConstraint +from ianalyzer.elasticsearch import elasticsearch + MAX_LENGTH_NAME = 126 MAX_LENGTH_DESCRIPTION = 254 MAX_LENGTH_TITLE = 256 @@ -260,6 +262,20 @@ def clean(self): e ]) + @property + def has_named_entities(self): + client = elasticsearch(self.es_index) + try: + mapping = client.indices.get_mapping( + index=self.es_index) + fields = mapping[self.es_index].get( + 'mappings', {}).get('properties', {}).keys() + if any(field.endswith(':ner') for field in fields): + return True + except: + return False + return False + FIELD_DISPLAY_TYPES = [ ('text_content', 'text content'), @@ -293,7 +309,8 @@ def clean(self): class Field(models.Model): name = models.SlugField( max_length=MAX_LENGTH_NAME, - validators=[validate_name_is_not_a_route_parameter], + validators=[validate_name_is_not_a_route_parameter, + validate_name_has_no_ner_suffix], help_text='internal name for the field', ) corpus_configuration = models.ForeignKey( diff --git a/backend/addcorpus/serializers.py b/backend/addcorpus/serializers.py index b124cd735..b350656a3 100644 --- a/backend/addcorpus/serializers.py +++ b/backend/addcorpus/serializers.py @@ -70,6 +70,7 @@ class CorpusConfigurationSerializer(serializers.ModelSerializer): languages = serializers.ListField(child=LanguageField()) category = PrettyChoiceField(choices=CATEGORIES) default_sort = NonEmptyJSONField() + has_named_entities = serializers.ReadOnlyField() class Meta: model = CorpusConfiguration @@ -89,6 +90,7 @@ class Meta: 'default_sort', 'language_field', 'fields', + 'has_named_entities', ] diff --git a/backend/addcorpus/validation/creation.py b/backend/addcorpus/validation/creation.py index 8b0a2666a..445a272fa 100644 --- a/backend/addcorpus/validation/creation.py +++ b/backend/addcorpus/validation/creation.py @@ -122,6 +122,13 @@ def validate_name_is_not_a_route_parameter(value): f'{value} cannot be used as a field name, because it is also a route parameter' ) + +def validate_name_has_no_ner_suffix(value): + if value.endswith(':ner'): + raise ValidationError( + f'{value} cannot be used as a field name: the suffix `:ner` is reserved for annotated_text fields' + ) + def mapping_can_be_searched(es_mapping): ''' Verify if a mapping is appropriate for searching diff --git a/backend/corpora/jewishmigration/jewishmigration.py b/backend/corpora/jewishmigration/jewishmigration.py index 24fa4486e..02fd3c8a3 100644 --- a/backend/corpora/jewishmigration/jewishmigration.py +++ b/backend/corpora/jewishmigration/jewishmigration.py @@ -48,9 +48,9 @@ class JewishMigration(PeacePortal, JSONCorpusDefinition): min_date = datetime(year=1, month=1, day=1) max_date = datetime(year=1800, month=12, day=31) - data_directory = getattr(settings, 'JMIG_DATA') - data_url = getattr(settings, 'JMIG_DATA_URL', - 'localhost:8100/api/records/') + data_directory = settings.JMIG_DATA_DIR + data_filepath = getattr(settings, 'JMIG_DATA', None) + data_url = getattr(settings, 'JMIG_DATA_URL', None) es_index = getattr(settings, 'JMIG_INDEX', 'jewishmigration') image = 'jewish_inscriptions.jpg' @@ -62,12 +62,12 @@ def sources(self, start, end): if self.data_url: response = requests.get(self.data_url) list_of_sources = response.json() - elif self.data_directory: - with open(self.data_directory, 'r') as f: + elif self.data_filepath: + with open(self.data_filepath, 'r') as f: list_of_sources = json.load(f) else: logging.getLogger('indexing').warning( - 'No data directory or URL provided.') + 'No data filepath or URL provided.') for source in list_of_sources: yield source diff --git a/backend/corpora/jewishmigration/test_jewishmigration.py b/backend/corpora/jewishmigration/test_jewishmigration.py index cec55f18a..d1dc7acdd 100644 --- a/backend/corpora/jewishmigration/test_jewishmigration.py +++ b/backend/corpora/jewishmigration/test_jewishmigration.py @@ -136,6 +136,7 @@ def jm_corpus_settings(settings): settings.CORPORA = { 'jewishmigration': os.path.join(here, 'jewishmigration.py') } + settings.JMIG_DATA_DIR = '/corpora' settings.JMIG_DATA = None settings.JMIG_DATA_URL = 'http://www.example.com' settings.JMIG_INDEX = 'test-jewishmigration' diff --git a/backend/es/conftest.py b/backend/es/conftest.py index c9272b917..40d462e05 100644 --- a/backend/es/conftest.py +++ b/backend/es/conftest.py @@ -1,4 +1,6 @@ import pytest +from time import sleep + from django.contrib.auth.models import Group from addcorpus.python_corpora.load_corpus import load_corpus_definition @@ -17,6 +19,27 @@ def corpus_definition(mock_corpus): yield corpus +@pytest.fixture() +def es_ner_search_client(es_client, basic_mock_corpus, basic_corpus_public, index_basic_mock_corpus): + """ + Create and populate an index for the mock corpus in elasticsearch. + Returns an elastic search client for the mock corpus. + """ + # add data from mock corpus + corpus = Corpus.objects.get(name=basic_mock_corpus) + es_client.indices.put_mapping(index=corpus.configuration.es_index, properties={ + "content:ner": {"type": "annotated_text"}}) + + es_client.index(index=corpus.configuration.es_index, document={ + 'id': 'my_identifier', + 'content': 'Guybrush Threepwood is looking for treasure on Monkey Island', + 'content:ner': '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)'}) + + # ES is "near real time", so give it a second before we start searching the index + sleep(1) + yield es_client + + @pytest.fixture() def es_index_client(es_client, mock_corpus): """ diff --git a/backend/es/tests/test_named_entity_search.py b/backend/es/tests/test_named_entity_search.py new file mode 100644 index 000000000..1335c63b8 --- /dev/null +++ b/backend/es/tests/test_named_entity_search.py @@ -0,0 +1,48 @@ +from es.views import NamedEntitySearchView + + +def test_ner_search_view(es_ner_search_client, client): + route = '/api/es/mock-csv-corpus/my_identifier/named_entities' + response = client.get(route, content_type='application/json') + assert response.status_code == 200 + + +def test_construct_ner_query(): + viewset = NamedEntitySearchView() + fields = ['content:ner'] + query = viewset.construct_named_entity_query(fields, 'my_identifier') + expected = { + "bool": { + "must": [ + { + "term": { + "id": "my_identifier" + } + }, + { + "terms": { + "content:ner": ["LOC", "PER", "ORG", "MISC"] + } + } + ] + } + } + assert query == expected + + +def test_find_named_entity_fields(es_ner_search_client): + viewset = NamedEntitySearchView() + fields = viewset.find_named_entity_fields( + es_ner_search_client, 'test-basic-corpus') + assert len(fields) == 1 + assert fields[0] == 'content:ner' + + +def test_find_entities(): + viewset = NamedEntitySearchView() + text = '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)' + output = viewset.find_entities(text) + expected = [{'entity': 'person', 'text': 'Guybrush Threepwood'}, + {'entity': 'flat', 'text': ' is looking for treasure on '}, + {'entity': 'location', 'text': 'Monkey Island'}] + assert output == expected diff --git a/backend/es/urls.py b/backend/es/urls.py index 62c389eae..f7ab60884 100644 --- a/backend/es/urls.py +++ b/backend/es/urls.py @@ -1,6 +1,8 @@ from django.urls import path -from es.views import * +from es.views import ForwardSearchView, NamedEntitySearchView urlpatterns = [ path('/_search', ForwardSearchView.as_view()), + path('//named_entities', + NamedEntitySearchView.as_view()) ] diff --git a/backend/es/views.py b/backend/es/views.py index ab7cadc2e..55e36ed7c 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -1,17 +1,19 @@ +import logging +import re + from django.utils import timezone from rest_framework.views import APIView from rest_framework.response import Response -from ianalyzer.elasticsearch import elasticsearch -from es.search import get_index, total_hits, hits -import logging -from rest_framework.permissions import IsAuthenticated from rest_framework.exceptions import APIException + from addcorpus.permissions import CorpusAccessPermission -from tag.permissions import CanSearchTags from api.save_query import should_save_query from addcorpus.models import Corpus from api.models import Query from api.api_query import api_query_to_es_query +from es.search import get_index, total_hits, hits +from ianalyzer.elasticsearch import elasticsearch +from tag.permissions import CanSearchTags logger = logging.getLogger(__name__) @@ -98,3 +100,80 @@ def _save_query_done(self, query, results): query.total_results = total_hits(results) query.transferred = len(hits(results)) query.save() + + +class NamedEntitySearchView(APIView): + ''' Construct a terms query for named entities, combined with a term query of the id + Perform search via Elasticsearch and reformat the output + ''' + entity_dict = { + 'PER': 'person', + 'LOC': 'location', + 'ORG': 'organization', + 'MISC': 'miscellaneous' + } + + permission_classes = [CorpusAccessPermission] + + def get(self, request, *args, **kwargs): + corpus_name = kwargs.get('corpus') + document_id = kwargs.get('id') + client = elasticsearch(corpus_name) + index = get_index(corpus_name) + fields = self.find_named_entity_fields(client, index) + query = self.construct_named_entity_query(fields, document_id) + response = client.search(index=index, query=query, fields=fields) + results = hits(response) + annotations = {} + response = {} + if len(results): + source = results[0]['_source'] + for field in fields: + text_with_entities = source.get(field) + annotations.update({field.replace(':ner', ''): self.find_entities( + text_with_entities)}) + return Response(annotations) + + def find_named_entity_fields(self, client, index: str) -> list[str]: + mapping = client.indices.get_mapping(index=index) + fields = mapping[index]['mappings']['properties'] + field_names = fields.keys() + return [name for name in field_names if name.endswith(':ner') and fields[name].get('type') == 'annotated_text'] + + def construct_named_entity_query(self, fields: list[str], document_id: str) -> dict: + return { + "bool": { + "must": [ + { + "term": { + "id": document_id + } + }, *self.add_terms(fields) + ] + } + } + + def add_terms(self, fields: list[str]) -> list[dict]: + return [ + { + "terms": { + field: ["LOC", "PER", "ORG", "MISC"] + } + } for field in fields + ] + + def find_entities(self, input_text: str) -> str: + # regex pattern to match annotations of format "[Wally](Person)" and split it into two groups + pattern = re.compile('(\[[^]]+\])(\([A-Z]+\))') + annotations = pattern.split(input_text) + output = [] + for index, annotation in enumerate(annotations): + if annotation.startswith('('): + continue + elif annotation.startswith('['): + output.append( + {'entity': self.entity_dict.get(annotations[index+1][1:-1]), 'text': annotation[1:-1]}) + else: + if annotation: + output.append({'entity': 'flat', 'text': annotation}) + return output diff --git a/backend/wordmodels/views.py b/backend/wordmodels/views.py index 81b2de6a1..1c4d28ac3 100644 --- a/backend/wordmodels/views.py +++ b/backend/wordmodels/views.py @@ -1,7 +1,5 @@ -from django.shortcuts import render from rest_framework.views import APIView from rest_framework.response import Response -from rest_framework.permissions import IsAuthenticated from addcorpus.permissions import CorpusAccessPermission, corpus_name_from_request from wordmodels import utils, visualisations from rest_framework.exceptions import APIException @@ -11,7 +9,7 @@ class RelatedWordsView(APIView): Get words with the highest similarity to the query term ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = [CorpusAccessPermission] def post(self, request, *args, **kwargs): corpus = corpus_name_from_request(request) @@ -35,7 +33,7 @@ class SimilarityView(APIView): Get similarity between two query terms ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = [CorpusAccessPermission] def get(self, request, *args, **kwargs): corpus = corpus_name_from_request(request) @@ -55,7 +53,7 @@ class WordInModelView(APIView): Check if a word has a vector in the model for a corpus ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = [CorpusAccessPermission] def get(self, request, *args, **kwargs): corpus = corpus_name_from_request(request) diff --git a/frontend/src/_utilities.scss b/frontend/src/_utilities.scss index d11b74d18..e1f728cf0 100644 --- a/frontend/src/_utilities.scss +++ b/frontend/src/_utilities.scss @@ -30,6 +30,11 @@ $section-padding: 3rem 1.5rem; $boxShadow: 0 2px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px rgba(10, 10, 10, 0.1); $boxShadowHover: 0px 5px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px $primary; +$entity-person: #303F9F; +$entity-location: #4e8f2d; +$entity-organization: #efb71d; +$entity-miscellaneous: #ee5986; + @import "bulma/sass/utilities/_all"; // based on the Bulma loader diff --git a/frontend/src/app/common-test-bed.ts b/frontend/src/app/common-test-bed.ts index 25de1b887..888762621 100644 --- a/frontend/src/app/common-test-bed.ts +++ b/frontend/src/app/common-test-bed.ts @@ -2,6 +2,7 @@ import { TestBed } from '@angular/core/testing'; import { ElementRef } from '@angular/core'; import { RouterTestingModule } from '@angular/router/testing'; import { HttpClientModule } from '@angular/common/http'; +import {FontAwesomeTestingModule} from '@fortawesome/angular-fontawesome/testing'; import { appRoutes, declarations, imports, providers } from './app.module'; @@ -10,9 +11,12 @@ import { AuthServiceMock } from '../mock-data/auth'; import { CorpusServiceMock } from '../mock-data/corpus'; import { DialogServiceMock } from '../mock-data/dialog'; import { ElasticSearchServiceMock } from '../mock-data/elastic-search'; +import { EntityServiceMock } from '../mock-data/entity'; import { MockCorpusResponse } from '../mock-data/corpus-response'; import { SearchServiceMock } from '../mock-data/search'; -import { ApiService, AuthService, CorpusService, DialogService, ElasticSearchService, SearchService } from './services'; +import { ApiService, AuthService, CorpusService, DialogService, SearchService } from './services'; +import { ElasticSearchService } from './services/elastic-search.service'; +import { EntityService } from './services/entity.service'; import { WordmodelsService } from './services/wordmodels.service'; import { WordmodelsServiceMock } from '../mock-data/wordmodels'; import { VisualizationService } from './services/visualization.service'; @@ -25,6 +29,7 @@ import { SimpleStore } from './store/simple-store'; export const commonTestBed = () => { const filteredImports = imports.filter(value => !(value in [HttpClientModule])); filteredImports.push(RouterTestingModule.withRoutes(appRoutes)); + filteredImports.push(FontAwesomeTestingModule) const filteredProviders = providers.filter(provider => !( provider in [ApiService, CorpusService, DialogService, ElasticSearchService, SearchService])); filteredProviders.push( @@ -49,6 +54,10 @@ export const commonTestBed = () => { provide: ElasticSearchService, useValue: new ElasticSearchServiceMock(), }, + { + provide: EntityService, + useValue: new EntityServiceMock(), + }, { provide: ElementRef, useClass: MockElementRef, diff --git a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts index b5c16bba0..262b26286 100644 --- a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts +++ b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts @@ -4,7 +4,6 @@ import { Subject } from 'rxjs'; import { CorpusDefinition } from '../../models/corpus-definition'; import { ApiService } from '../../services'; import { ActivatedRoute } from '@angular/router'; -import { filter, take } from 'rxjs/operators'; import * as _ from 'lodash'; import { HttpErrorResponse } from '@angular/common/http'; diff --git a/frontend/src/app/document-page/document-page.component.html b/frontend/src/app/document-page/document-page.component.html index 26ebf2ca6..8c2635901 100644 --- a/frontend/src/app/document-page/document-page.component.html +++ b/frontend/src/app/document-page/document-page.component.html @@ -1,7 +1,8 @@
-
+
+ Show named entities
@@ -14,7 +15,7 @@
- +
diff --git a/frontend/src/app/document-page/document-page.component.ts b/frontend/src/app/document-page/document-page.component.ts index 4d44a7d61..488accb81 100644 --- a/frontend/src/app/document-page/document-page.component.ts +++ b/frontend/src/app/document-page/document-page.component.ts @@ -19,10 +19,14 @@ export class DocumentPageComponent implements OnInit { documentId: string; document: FoundDocument; - documentNotFound: boolean; + documentFound: boolean; documentIcons = documentIcons; + showNEROption: boolean; + + showNamedEntities = false; + constructor( private corpusService: CorpusService, private elasticSearchService: ElasticSearchService, @@ -55,6 +59,7 @@ export class DocumentPageComponent implements OnInit { ]).subscribe(([params, corpus]) => { this.corpus = corpus; this.documentId = params['id']; + this.showNEROption = this.corpus.hasNamedEntities; this.getDocument(this.documentId); this.title.setTitle(pageTitle(`Document in ${corpus.title}`)); }); @@ -63,9 +68,13 @@ export class DocumentPageComponent implements OnInit { getDocument(id: string) { this.elasticSearchService.getDocumentById(id, this.corpus).then(document => { this.document = document; - this.documentNotFound = _.isUndefined(this.document); + this.documentFound = !_.isUndefined(this.document); }); } + toggleNER(active: boolean): void { + this.showNamedEntities = active; + } + } diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index 8f0fc9078..26c464270 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -9,7 +9,7 @@ - + Your tags @@ -21,9 +21,9 @@ + [innerHtml]="field | elasticsearchHighlight:document | paragraph"> - {{displayGeoPointField(field)}} + {{field | geoData:document}} {{document.fieldValue(field)}} @@ -38,10 +38,30 @@
-
+
+
+ + + {{textSegment.text}} + + + + {{textSegment.text | paragraph}} + + +
+ +
+
+
+ +
+
+
diff --git a/frontend/src/app/document-view/document-view.component.scss b/frontend/src/app/document-view/document-view.component.scss index 9fa3f278e..af60ed17b 100644 --- a/frontend/src/app/document-view/document-view.component.scss +++ b/frontend/src/app/document-view/document-view.component.scss @@ -8,5 +8,3 @@ table { color: $primary; } } - - \ No newline at end of file diff --git a/frontend/src/app/document-view/document-view.component.spec.ts b/frontend/src/app/document-view/document-view.component.spec.ts index f59858f51..9f1e799ee 100644 --- a/frontend/src/app/document-view/document-view.component.spec.ts +++ b/frontend/src/app/document-view/document-view.component.spec.ts @@ -20,10 +20,9 @@ describe('DocumentViewComponent', () => { fixture = TestBed.createComponent(DocumentViewComponent); component = fixture.componentInstance; component.corpus = _.merge({ - scanImageType: 'farout_image_type', - fields: [mockField] + scanImageType: 'farout_image_type' }, mockCorpus); - component.document = makeDocument({ great_field: 'Hello world!' }); + component.document = makeDocument({ great_field: 'Hello world!', speech: 'Wally was last seen in Paris' }); fixture.detectChanges(); }); @@ -31,11 +30,8 @@ describe('DocumentViewComponent', () => { expect(component).toBeTruthy(); }); - it('should render fields', async () => { - await fixture.whenStable(); - + it('should render fields', () => { expect(component.propertyFields).toEqual([mockField]); - const debug = fixture.debugElement.queryAll(By.css('[data-test-field-value]')); expect(debug.length).toEqual(1); // number of fields const element = debug[0].nativeElement; @@ -48,4 +44,14 @@ describe('DocumentViewComponent', () => { expect(debug[0].attributes['id']).toBe('tab-speech'); expect(debug[1].attributes['id']).toBe('tab-scan'); }); + + it('shows named entities if showEntities is true', async () => { + expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeFalsy(); + component.showEntities = true; + fixture.detectChanges(); + await fixture.whenStable(); + fixture.detectChanges(); + expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeTruthy(); + }); + }); diff --git a/frontend/src/app/document-view/document-view.component.ts b/frontend/src/app/document-view/document-view.component.ts index 81a35aa27..203c3d38b 100644 --- a/frontend/src/app/document-view/document-view.component.ts +++ b/frontend/src/app/document-view/document-view.component.ts @@ -3,7 +3,7 @@ import { Component, Input, OnChanges, SimpleChanges } from '@angular/core'; import { CorpusField, FoundDocument, Corpus, QueryModel } from '../models/index'; import { DocumentView } from '../models/document-page'; import * as _ from 'lodash'; -import { documentIcons } from '../shared/icons'; +import { documentIcons, entityIcons } from '../shared/icons'; @Component({ selector: 'ia-document-view', @@ -24,7 +24,11 @@ export class DocumentViewComponent implements OnChanges { @Input() public view: DocumentView; + @Input() + public showEntities: boolean; + documentIcons = documentIcons; + entityIcons = entityIcons; /** active tab on opening */ activeTab: string; @@ -75,62 +79,4 @@ export class DocumentViewComponent implements OnChanges { return field.mappingType === 'geo_point'; } - displayGeoPointField(field: CorpusField) { - let latitude = this.document.fieldValue(field)[field.name][1]; - let longitude = this.document.fieldValue(field)[field.name][0]; - // Round to 2 decimal places - latitude = Math.round(latitude * 100) / 100; - longitude = Math.round(longitude * 100) / 100; - return `Lat: ${latitude}; Lon: ${longitude}`; - } - - /** - * Checks if user has selected fields in the queryModel and whether current field is among them - * Used to check which fields need to be highlighted - */ - selectedFieldsContain(field: CorpusField) { - if (this.queryModel && this.queryModel.searchFields && this.queryModel.searchFields.includes(field)) { - return true; - } else if (this.queryModel && !this.queryModel.searchFields) { - return true; // if there are no selected fields, return true for all fields - } else { - return false; - } - } - - stripTags(htmlString: string){ - const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); - return parseHTML.body.textContent || ''; - } - - formatInnerHtml(field: CorpusField) { - const fieldValue = this.document.fieldValues[field.name]; - - if (_.isEmpty(fieldValue)) { - return; - } - - const highlighted = this.highlightedInnerHtml(field); - return this.addParagraphTags(highlighted); - } - - - highlightedInnerHtml(field: CorpusField) { - let highlighted = this.document.fieldValues[field.name]; - if (this.document.highlight && this.document.highlight.hasOwnProperty(field.name) && - this.selectedFieldsContain(field)) { // only apply highlights to selected search fields - for (const highlight of this.document.highlight[field.name]) { - const stripped_highlight = this.stripTags(highlight); - highlighted = highlighted.replace(stripped_highlight, highlight); - } - return highlighted; - } else { - return this.document.fieldValues[field.name]; - } - } - - addParagraphTags(content: string | string[]) { - const paragraphs = typeof content === 'string' ? content.split('\n') : content; - return paragraphs.map(p => `

${p}

`).join(' '); - } } diff --git a/frontend/src/app/document/document-popup/document-popup.component.html b/frontend/src/app/document/document-popup/document-popup.component.html index 6f379e664..705199abb 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.html +++ b/frontend/src/app/document/document-popup/document-popup.component.html @@ -1,9 +1,12 @@ + [responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true"> + + Document {{document.position}} of {{page.total}} + Show named entities + - +
diff --git a/frontend/src/app/document/document-popup/document-popup.component.spec.ts b/frontend/src/app/document/document-popup/document-popup.component.spec.ts index 74546db68..b497ceecf 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.spec.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.spec.ts @@ -1,7 +1,14 @@ -import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; +import { ComponentFixture, TestBed, fakeAsync, tick, waitForAsync } from '@angular/core/testing'; +import { By } from '@angular/platform-browser'; import { DocumentPopupComponent } from './document-popup.component'; import { commonTestBed } from '../../common-test-bed'; +import { makeDocument } from '../../../mock-data/constructor-helpers'; +import { mockCorpus, mockCorpus2, mockField } from '../../../mock-data/corpus'; +import { DocumentPage } from '../../models/document-page'; +import { QueryModel } from '../../models'; +import { query } from '@angular/animations'; + describe('DocumentPopupComponent', () => { let component: DocumentPopupComponent; @@ -14,10 +21,27 @@ describe('DocumentPopupComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(DocumentPopupComponent); component = fixture.componentInstance; + const document = makeDocument({ great_field: 'Hello world!' }); + component.document = document; + component.page = new DocumentPage([document], 1, [mockField]); + component.queryModel = new QueryModel(mockCorpus); fixture.detectChanges(); }); it('should create', () => { expect(component).toBeTruthy(); }); + + it('does not show the NER toggle for corpora without named entities', () => { + expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeFalsy(); + }); + + it('shows the NER toggle for corpora with named entities', () => { + const setModel = component.queryModel; + const queryModel = new QueryModel(mockCorpus2); + component.queryModel = queryModel; + component.ngOnChanges({queryModel: {previousValue: setModel, currentValue: queryModel, firstChange: false, isFirstChange: null}}); + fixture.detectChanges(); + expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeTruthy(); + }); }); diff --git a/frontend/src/app/document/document-popup/document-popup.component.ts b/frontend/src/app/document/document-popup/document-popup.component.ts index 98109ffa5..b41997c14 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.ts @@ -4,7 +4,7 @@ import { takeUntil } from 'rxjs/operators'; import * as _ from 'lodash'; import { FoundDocument, QueryModel } from '../../models'; import { Subject } from 'rxjs'; -import { documentIcons, actionIcons } from '../../shared/icons'; +import { documentIcons, actionIcons, corpusIcons } from '../../shared/icons'; @Component({ selector: 'ia-document-popup', @@ -23,6 +23,9 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { actionIcons = actionIcons; documentIcons = documentIcons; + showNamedEntities = false; + showNEROption = false; + private refresh$ = new Subject(); get documentPageLink(): string[] { @@ -38,6 +41,9 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { } ngOnChanges(changes: SimpleChanges): void { + if (changes.queryModel) { + this.showNEROption = this.queryModel.corpus.hasNamedEntities; + } if (changes.page) { this.refresh$.next(); this.focusUpdate(); @@ -63,4 +69,8 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { this.visible = false; } } + + toggleNER(active: boolean): void { + this.showNamedEntities = active; + } } diff --git a/frontend/src/app/document/document-preview/document-preview.component.html b/frontend/src/app/document/document-preview/document-preview.component.html index 87429833f..248e7a632 100644 --- a/frontend/src/app/document/document-preview/document-preview.component.html +++ b/frontend/src/app/document/document-preview/document-preview.component.html @@ -15,7 +15,7 @@ + [innerHtml]="document.fieldValue(field) | snippet"> diff --git a/frontend/src/app/document/document.module.ts b/frontend/src/app/document/document.module.ts index 8674a5677..67822bcc8 100644 --- a/frontend/src/app/document/document.module.ts +++ b/frontend/src/app/document/document.module.ts @@ -9,8 +9,8 @@ import { TagModule } from '../tag/tag.module'; import { DocumentPopupComponent } from './document-popup/document-popup.component'; import { DialogModule } from 'primeng/dialog'; import { DocumentPreviewComponent } from './document-preview/document-preview.component'; - - +import { EntityLegendComponent } from './entity-legend/entity-legend.component'; +import { ElasticsearchHighlightPipe, GeoDataPipe, ParagraphPipe, SnippetPipe } from '../shared/pipes'; @NgModule({ declarations: [ @@ -19,6 +19,11 @@ import { DocumentPreviewComponent } from './document-preview/document-preview.co SearchRelevanceComponent, DocumentPopupComponent, DocumentPreviewComponent, + EntityLegendComponent, + ElasticsearchHighlightPipe, + GeoDataPipe, + ParagraphPipe, + SnippetPipe ], imports: [ DialogModule, @@ -31,6 +36,7 @@ import { DocumentPreviewComponent } from './document-preview/document-preview.co DocumentViewComponent, DocumentPageComponent, DocumentPopupComponent, + EntityLegendComponent, SearchRelevanceComponent, ] }) diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.html b/frontend/src/app/document/entity-legend/entity-legend.component.html new file mode 100644 index 000000000..41f9622f6 --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.html @@ -0,0 +1,13 @@ +
+
    +
  • + + + {{entity.slice(0,1).toUpperCase() + entity.slice(1)}} + +
  • +
+ + No named entities were found in this text. + +
diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.scss b/frontend/src/app/document/entity-legend/entity-legend.component.scss new file mode 100644 index 000000000..459455661 --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.scss @@ -0,0 +1,8 @@ +ul { + list-style-type: none; + margin-left: 0; +} + +.no-entities-message { + opacity: 0.6; +} diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.spec.ts b/frontend/src/app/document/entity-legend/entity-legend.component.spec.ts new file mode 100644 index 000000000..69c35776e --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { EntityLegendComponent } from './entity-legend.component'; + +describe('EntitiesComponent', () => { + let component: EntityLegendComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ EntityLegendComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(EntityLegendComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.ts b/frontend/src/app/document/entity-legend/entity-legend.component.ts new file mode 100644 index 000000000..882fcd59c --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.ts @@ -0,0 +1,28 @@ +import { Component, Input, OnChanges } from '@angular/core'; +import * as _ from 'lodash'; + +import { entityIcons } from '../../shared/icons'; +import { FieldEntities } from '../../models'; + +@Component({ + selector: 'ia-entity-legend', + templateUrl: './entity-legend.component.html', + styleUrls: ['./entity-legend.component.scss'] +}) +export class EntityLegendComponent implements OnChanges { + @Input() entityAnnotations: FieldEntities[]; + + public entityIcons = entityIcons; + public entities: string[]; + + constructor() { } + + ngOnChanges(): void { + if (!this.entityAnnotations) { + this.entities = null; + } else { + this.entities = _.uniq(this.entityAnnotations.map((item) => item.entity)).filter((value) => value !=='flat'); + } + } + +} diff --git a/frontend/src/app/manual/manual.module.ts b/frontend/src/app/manual/manual.module.ts index ac0d5b3fe..61404fcbd 100644 --- a/frontend/src/app/manual/manual.module.ts +++ b/frontend/src/app/manual/manual.module.ts @@ -4,7 +4,7 @@ import { ManualNavigationComponent } from './manual-navigation.component'; import { ManualComponent } from './manual.component'; import { AboutComponent } from '../about/about.component'; import { PrivacyComponent } from '../privacy/privacy.component'; - +import { RegexHighlightPipe } from '../shared/pipes'; @NgModule({ @@ -13,6 +13,7 @@ import { PrivacyComponent } from '../privacy/privacy.component'; ManualComponent, ManualNavigationComponent, PrivacyComponent, + RegexHighlightPipe ], imports: [ SharedModule diff --git a/frontend/src/app/models/corpus.ts b/frontend/src/app/models/corpus.ts index 4906e2966..4e58d42fe 100644 --- a/frontend/src/app/models/corpus.ts +++ b/frontend/src/app/models/corpus.ts @@ -29,6 +29,7 @@ export class Corpus { public wordModelsPresent: boolean, public languages: string[], public category: string, + public hasNamedEntities: boolean, public documentContext?: DocumentContext, public newHighlight?: boolean, public defaultSort?: SortState, diff --git a/frontend/src/app/models/found-document.spec.ts b/frontend/src/app/models/found-document.spec.ts index fa8452f6c..b286fa8ec 100644 --- a/frontend/src/app/models/found-document.spec.ts +++ b/frontend/src/app/models/found-document.spec.ts @@ -1,14 +1,17 @@ -import { TestBed, waitForAsync } from '@angular/core/testing'; +import { TestBed, fakeAsync, waitForAsync } from '@angular/core/testing'; +import * as _ from 'lodash'; +import { reduce, take } from 'rxjs/operators'; +import { Observable } from 'rxjs'; import { makeDocument } from '../../mock-data/constructor-helpers'; import { mockCorpus, mockCorpus3 } from '../../mock-data/corpus'; +import { EntityServiceMock } from '../../mock-data/entity'; +import { TagServiceMock, mockTags } from '../../mock-data/tag'; import { FoundDocument } from './found-document'; +import { EntityService } from '../services/entity.service'; import { TagService } from '../services/tag.service'; -import { TagServiceMock, mockTags } from '../../mock-data/tag'; -import * as _ from 'lodash'; -import { reduce, take } from 'rxjs/operators'; -import { Observable } from 'rxjs'; import { Tag } from './tag'; + const maxScore = 2.9113607; const mockResponse = { _index: 'troonredes', @@ -33,19 +36,20 @@ const mockResponse = { }; describe('FoundDocument', () => { - let tagService: TagService; + const mockTagService = new TagServiceMock() as any; + const mockEntityService = new EntityServiceMock() as any; beforeEach(() => { TestBed.configureTestingModule({ providers: [ - { provide: TagService, useValue: new TagServiceMock() } + { provide: TagService, useClass: TagServiceMock }, + { provide: EntityService, useClass: EntityServiceMock } ] }); - tagService = TestBed.inject(TagService); }); it('should construct from an elasticsearch response', () => { - const document = new FoundDocument(tagService, mockCorpus, mockResponse, maxScore); + const document = new FoundDocument(mockTagService, mockEntityService, mockCorpus, mockResponse, maxScore); expect(document.id).toBe('1994_troonrede'); expect(document.fieldValues['monarch']).toBe('Beatrix'); @@ -90,4 +94,5 @@ describe('FoundDocument', () => { ]); }); })); + }); diff --git a/frontend/src/app/models/found-document.ts b/frontend/src/app/models/found-document.ts index 96b40cead..6fcbcb134 100644 --- a/frontend/src/app/models/found-document.ts +++ b/frontend/src/app/models/found-document.ts @@ -1,11 +1,14 @@ import * as _ from 'lodash'; +import { map, mergeMap, shareReplay, take } from 'rxjs/operators'; + import { makeContextParams } from '../utils/document-context'; import { Corpus, CorpusField } from './corpus'; import { FieldValues, HighlightResult, SearchHit } from './elasticsearch'; import { Tag } from './tag'; import { Observable, Subject, merge, timer } from 'rxjs'; +import { EntityService } from '../services/entity.service'; import { TagService } from '../services/tag.service'; -import { map, mergeMap, shareReplay, take, tap } from 'rxjs/operators'; +import { FieldEntities } from './search-results'; export class FoundDocument { id: string; @@ -27,13 +30,16 @@ export class FoundDocument { /** tags created on the document */ tags$: Observable; + /** named entities associated with the document */ + entityAnnotations$: Observable<{[fieldName: string]: FieldEntities[]}>; private tagsChanged$ = new Subject(); constructor( private tagService: TagService, + private entityService: EntityService, public corpus: Corpus, hit: SearchHit, - maxScore: number = 1 + maxScore: number = 1, ) { this.id = hit._id; this.relevance = hit._score / maxScore; @@ -42,12 +48,19 @@ export class FoundDocument { const created$ = timer(0); // observable of the moment of construction (i.e. now) - // tags need to refreshed when the document is created, and + // tags need to be refreshed when the document is created, and // after each update + // shareReplay shares the value over all observers: + // add/removeTag, async pipe in document-tags.component template this.tags$ = merge(created$, this.tagsChanged$).pipe( mergeMap(() => this.fetchTags()), shareReplay(1), ); + + this.entityAnnotations$ = created$.pipe( + mergeMap(() => this.fetchAnnotatedEntities()), + shareReplay(1), + ); } /** @@ -112,6 +125,10 @@ export class FoundDocument { ); } + private fetchAnnotatedEntities(): Observable<{[fieldName: string]: FieldEntities[]}> { + return this.entityService.getDocumentEntities(this.corpus, this.id); + } + private setTags(tags: Tag[]): Observable { return this.tagService.setDocumentTags(this, tags); } @@ -119,4 +136,5 @@ export class FoundDocument { private fetchTags(): Observable { return this.tagService.getDocumentTags(this); } + } diff --git a/frontend/src/app/models/search-results.ts b/frontend/src/app/models/search-results.ts index ad1593a51..1eac5b1ff 100644 --- a/frontend/src/app/models/search-results.ts +++ b/frontend/src/app/models/search-results.ts @@ -83,6 +83,14 @@ export interface QueryFeedback { similarTerms?: string[]; } +export interface FieldEntities { + [entityType: string] : string +} + +export interface NamedEntitiesResult { + [fieldName: string]: FieldEntities[] +} + export interface TaskResult { task_ids: string[] }; export interface TaskSuccess { diff --git a/frontend/src/app/search/index.ts b/frontend/src/app/search/index.ts index 11e8bc3bf..40905695a 100644 --- a/frontend/src/app/search/index.ts +++ b/frontend/src/app/search/index.ts @@ -1,4 +1,3 @@ -export * from './highlight.pipe'; export * from './search.component'; export * from './search-relevance.component'; export * from './search-results.component'; diff --git a/frontend/src/app/services/corpus.service.ts b/frontend/src/app/services/corpus.service.ts index 7092bd591..6c4e7ff6d 100644 --- a/frontend/src/app/services/corpus.service.ts +++ b/frontend/src/app/services/corpus.service.ts @@ -89,6 +89,7 @@ export class CorpusService { data.word_models_present, data.languages, data.category, + data.has_named_entities, this.parseDocumentContext(data.document_context, allFields), data.new_highlight, this.parseDefaultSort(data.default_sort, allFields), diff --git a/frontend/src/app/services/download.service.spec.ts b/frontend/src/app/services/download.service.spec.ts index add89bc77..308c65640 100644 --- a/frontend/src/app/services/download.service.spec.ts +++ b/frontend/src/app/services/download.service.spec.ts @@ -3,8 +3,6 @@ import { TestBed, inject } from '@angular/core/testing'; import { ApiService } from './api.service'; import { ApiServiceMock } from '../../mock-data/api'; import { DownloadService } from './download.service'; -import { ElasticSearchService } from './elastic-search.service'; -import { ElasticSearchServiceMock } from '../../mock-data/elastic-search'; import { mockCorpus, mockField } from '../../mock-data/corpus'; import { DownloadOptions, LimitedResultsDownloadParameters, QueryModel, SortState } from '../models'; diff --git a/frontend/src/app/services/elastic-search.service.spec.ts b/frontend/src/app/services/elastic-search.service.spec.ts index 53841cc52..e1ace87e8 100644 --- a/frontend/src/app/services/elastic-search.service.spec.ts +++ b/frontend/src/app/services/elastic-search.service.spec.ts @@ -3,9 +3,12 @@ import { HttpClientTestingModule, HttpTestingController } from '@angular/common/ import { ElasticSearchService, SearchResponse } from './elastic-search.service'; import { QueryModel } from '../models'; import { mockCorpus, mockField, mockField2 } from '../../mock-data/corpus'; -import { TagService } from './tag.service'; +import { EntityService } from './entity.service'; +import { EntityServiceMock } from '../../mock-data/entity'; import { TagServiceMock } from '../../mock-data/tag'; -import { Aggregator, TermsAggregator } from '../models/aggregation'; +import { TagService } from './tag.service'; +import { TermsAggregator } from '../models/aggregation'; + const mockResponse: SearchResponse = { took: 4, @@ -65,6 +68,7 @@ describe('ElasticSearchService', () => { TestBed.configureTestingModule({ providers: [ ElasticSearchService, + { provide: EntityService, useValue: new EntityServiceMock()}, { provide: TagService, useValue: new TagServiceMock() } ], imports: [ HttpClientTestingModule ] diff --git a/frontend/src/app/services/elastic-search.service.ts b/frontend/src/app/services/elastic-search.service.ts index 8c4df1822..defd2d261 100644 --- a/frontend/src/app/services/elastic-search.service.ts +++ b/frontend/src/app/services/elastic-search.service.ts @@ -6,18 +6,19 @@ import { FoundDocument, Corpus, QueryModel, SearchResults, SearchHit } from '../models/index'; +import { Aggregator } from '../models/aggregation'; import * as _ from 'lodash'; import { TagService } from './tag.service'; import { APIQuery } from '../models/search-requests'; import { PageResultsParameters } from '../models/page-results'; import { resultsParamsToAPIQuery } from '../utils/es-query'; -import { Aggregator } from '../models/aggregation'; +import { EntityService } from './entity.service'; @Injectable() export class ElasticSearchService { - constructor(private http: HttpClient, private tagService: TagService) { + constructor(private http: HttpClient, private entityService: EntityService, private tagService: TagService) { } getDocumentById(id: string, corpus: Corpus): Promise { @@ -64,6 +65,8 @@ export class ElasticSearchService { return this.parseResponse(queryModel.corpus, response); } + + /** * Execute an ElasticSearch query and return a dictionary containing the results. */ @@ -96,7 +99,7 @@ export class ElasticSearchService { * return the id, relevance and field values of a given document */ private hitToDocument(corpus: Corpus, hit: SearchHit, maxScore: number): FoundDocument { - return new FoundDocument(this.tagService, corpus, hit, maxScore); + return new FoundDocument(this.tagService, this.entityService, corpus, hit, maxScore); } } diff --git a/frontend/src/app/services/entity.service.ts b/frontend/src/app/services/entity.service.ts new file mode 100644 index 000000000..056e1e543 --- /dev/null +++ b/frontend/src/app/services/entity.service.ts @@ -0,0 +1,19 @@ +import { HttpClient } from '@angular/common/http'; +import { Injectable } from '@angular/core'; +import { Observable } from 'rxjs'; + +import { Corpus, NamedEntitiesResult } from '../models'; + +@Injectable({ + providedIn: 'root', +}) +export class EntityService { + + constructor(private http: HttpClient) { + } + + public getDocumentEntities(corpus: Corpus, id: string): Observable { + const url = `/api/es/${corpus.name}/${id}/named_entities`; + return this.http.get(url); + } +} diff --git a/frontend/src/app/services/highlight.service.spec.ts b/frontend/src/app/services/highlight.service.spec.ts index 2eb8aa049..0f50f0cfb 100644 --- a/frontend/src/app/services/highlight.service.spec.ts +++ b/frontend/src/app/services/highlight.service.spec.ts @@ -90,43 +90,6 @@ describe('HighlightService', () => { [13, 'في']]); }); - it('Should limit the length of hits using snippets', () => { - const text = generateSequence(0, 10000); - const remainingLength = (maxSnippetsLength - 4) * 0.5; - const leftLength = Math.ceil(remainingLength); - const rightLength = Math.floor(remainingLength); - const sequenceSnippetsLength = Math.ceil(leftLength / 5); - - const highlights = highlightService.highlight(text, '5000'); - const snippets = highlightService.snippets(highlights); - - const result = getHighlightedString(snippets); - const expected = getHighlightedString([ - { - substring: omissionString + generateSequence(5000 - sequenceSnippetsLength, 5000).slice(-leftLength + 1) + ' ', - isHit: false - }, - { - substring: '5000', - isHit: true - }, - { - substring: ' ' + generateSequence(5001, 5001 + sequenceSnippetsLength).substr(0, rightLength - 1) + omissionString, - isHit: false - }]); - - expect(result).toEqual(expected); - }); - - it('Should pass short snippets', () => { - const highlights = highlightService.highlight('hello world!', ''); - const snippets = highlightService.snippets(highlights); - expect(snippets).toEqual([{ - isHit: false, - substring: 'hello world!' - }]); - }); - it('Should highlight multiline text', () => { expectHighlights( // eslint-disable-next-line max-len diff --git a/frontend/src/app/services/highlight.service.ts b/frontend/src/app/services/highlight.service.ts index 4fdc2a4ea..e4bc31941 100644 --- a/frontend/src/app/services/highlight.service.ts +++ b/frontend/src/app/services/highlight.service.ts @@ -5,10 +5,7 @@ import { Injectable } from '@angular/core'; * a more scalable approach would need to be implemented if rendering many hits is required. */ const maxHits = 100; -/** - * The maximum number of snippets. - */ -const maxSnippetsCount = 7; + /** * The maximum character length of all the text snippets combined. */ @@ -52,7 +49,6 @@ export class HighlightService { } let result: RegExpExecArray; - const parsedText: TextPart[] = []; let lastIndex = 0; for ( @@ -78,39 +74,6 @@ export class HighlightService { } } - /** - * Gets short snippets from the text part to give the user a short overview of the text content. - */ - public snippets(parts: IterableIterator): TextPart[] { - const snippets: TextPart[] = []; - for ( - let i = 0, next = parts.next(); - !next.done && i < maxSnippetsCount; - i++, next = parts.next() - ) { - snippets.push(next.value); - } - - const lengths = this.getSnippetLengths( - snippets.map((snippet) => snippet.substring.length), - maxSnippetsLength - ); - - snippets.forEach((part, index) => { - part.substring = this.cropSnippetText( - part.substring, - lengths[index], - index === snippets.length - 1 - ? 'left' - : index === 0 - ? 'right' - : 'middle' - ); - }); - - return snippets; - } - /** * Convert the query to a regular expression matching any hit in a string. * @@ -151,79 +114,6 @@ export class HighlightService { ); } - private getSnippetLengths( - actualLengths: number[], - maxTotalLength: number, - croppedSnippets = actualLengths.length - ): number[] { - const targetLengths: number[] = []; - let remainingCharacters = maxTotalLength; - const maxLength = Math.max( - 1, - Math.floor(maxTotalLength / croppedSnippets) - ); - - let remainingSnippets = 0; - - let i = 0; - for (; i < actualLengths.length && remainingCharacters > 0; i++) { - const actualLength = actualLengths[i]; - const targetLength = Math.min(actualLength, maxLength); - - remainingCharacters -= targetLength; - targetLengths[i] = targetLength; - - if (actualLength > targetLength) { - // only the cropped snippets could become longer - remainingSnippets++; - } - } - for (; i < actualLengths.length; i++) { - targetLengths[i] = 0; - } - - if (remainingCharacters && remainingSnippets) { - // if a snippet is shorter than the maximum snippet length, allow the remaining snippets to become longer - const additionalLengths = this.getSnippetLengths( - actualLengths.map( - (length, index) => length - targetLengths[index] - ), - remainingCharacters, - remainingSnippets - ); - return targetLengths.map( - (length, index) => length + additionalLengths[index] - ); - } - - return targetLengths; - } - - private cropSnippetText( - text: string, - maxLength: number, - location: 'left' | 'middle' | 'right' - ): string { - if (text.length <= maxLength) { - return text; - } - - switch (location) { - case 'left': - return text.substr(0, maxLength) + omissionString; - - case 'middle': - return ( - text.substr(0, maxLength / 2) + - omissionString + - text.substr(text.length - maxLength / 2) - ); - - case 'right': - return omissionString + text.slice(-maxLength); - } - } - /** * Get the word patterns match in a query. * diff --git a/frontend/src/app/services/search.service.spec.ts b/frontend/src/app/services/search.service.spec.ts index 523c16112..1facbb5f3 100644 --- a/frontend/src/app/services/search.service.spec.ts +++ b/frontend/src/app/services/search.service.spec.ts @@ -9,8 +9,6 @@ import { ElasticSearchServiceMock } from '../../mock-data/elastic-search'; import { QueryService } from './query.service'; import { SearchService } from './search.service'; import { SessionService } from './session.service'; -import { WordmodelsService } from './wordmodels.service'; -import { WordmodelsServiceMock } from '../../mock-data/wordmodels'; import { HttpClientTestingModule } from '@angular/common/http/testing'; import { QueryModel } from '../models'; import { mockCorpus } from '../../mock-data/corpus'; diff --git a/frontend/src/app/shared/icons.ts b/frontend/src/app/shared/icons.ts index daefdf6bf..21b16626d 100644 --- a/frontend/src/app/shared/icons.ts +++ b/frontend/src/app/shared/icons.ts @@ -4,9 +4,9 @@ import { } from '@fortawesome/free-regular-svg-icons'; import { IconDefinition as SolidIconDefinition, - faAngleDown, faAngleUp, faArrowLeft, faArrowRight, faAt, faBook, faBookOpen, faChartColumn, - faCheck, faChevronDown, faChevronLeft, faChevronRight, faCog, faCogs, faDatabase, faDiagramProject, - faDownload, faEdit, faEnvelope, faEye, faFilter, faHistory, faImage, faInfo, faInfoCircle, faLink, faList, faLock, + faAngleDown, faAngleUp, faArrowLeft, faArrowRight, faAt, faBook, faBookmark, faBookOpen, faBuilding, faChartColumn, + faCheck, faChevronLeft, faChevronRight, faCog, faCogs, faDatabase, faDiagramProject, + faDownload, faEnvelope, faEye, faFilter, faHistory, faImage, faInfo, faInfoCircle, faLink, faList, faLocationDot, faLock, faMinus, faPalette, faPencil, faPlus, faQuestionCircle, faSearch, faSearchMinus, faSearchPlus, faSignOut, faSortAlphaAsc, faSortAlphaDesc, faSortNumericAsc, faSortNumericDesc, faSquare, faTable, faTags, faTimes, faTrashCan, faUndo, faUpload, faUser @@ -109,3 +109,10 @@ export const documentIcons: Icons = { scanAlt: faNewspaper, context: faBookOpen, }; + +export const entityIcons: Icons = { + person: faUser, + location: faLocationDot, + organization: faBuilding, + miscellaneous: faBookmark, +} diff --git a/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts new file mode 100644 index 000000000..ae1d374da --- /dev/null +++ b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts @@ -0,0 +1,43 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import * as _ from 'lodash'; + +import { CorpusField, FoundDocument } from '../../models'; + +@Pipe({ + name: 'elasticsearchHighlight' +}) +export class ElasticsearchHighlightPipe implements PipeTransform { + + /** + * Transforms a text to display highlights fetched from Elasticsearch + * + * @param document a FoundDocument, containing the fetched highlights + */ + transform(field: CorpusField, document: FoundDocument) { + const fieldValue = document.fieldValues[field.name]; + + if (_.isEmpty(fieldValue)) { + return; + } + + const highlighted = this.highlightedInnerHtml(field, document); + return highlighted; + } + + highlightedInnerHtml(field: CorpusField, document: FoundDocument) { + let highlighted = document.fieldValues[field.name]; + if (document.highlight && document.highlight.hasOwnProperty(field.name)) { + for (const highlight of document.highlight[field.name]) { + const strippedHighlight = this.stripTags(highlight); + highlighted = highlighted.replace(strippedHighlight, highlight); + } + } + return highlighted; + } + + stripTags(htmlString: string){ + const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); + return parseHTML.body.textContent || ''; + } + +} diff --git a/frontend/src/app/shared/pipes/geo-data.pipe.ts b/frontend/src/app/shared/pipes/geo-data.pipe.ts new file mode 100644 index 000000000..08d9df047 --- /dev/null +++ b/frontend/src/app/shared/pipes/geo-data.pipe.ts @@ -0,0 +1,25 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +import { CorpusField, FoundDocument } from '../../models'; +@Pipe({ + name: 'geoData' +}) +export class GeoDataPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms GeoJSON data + * + * @param document FoundDocument holding the actual data + */ + transform(field: CorpusField, document: FoundDocument) { + let latitude = document.fieldValue(field)[field.name][1]; + let longitude = document.fieldValue(field)[field.name][0]; + // Round to 2 decimal places + latitude = Math.round(latitude * 100) / 100; + longitude = Math.round(longitude * 100) / 100; + return `Lat: ${latitude}; Lon: ${longitude}`; + } + +} diff --git a/frontend/src/app/shared/pipes/index.ts b/frontend/src/app/shared/pipes/index.ts new file mode 100644 index 000000000..0bf4c0f5b --- /dev/null +++ b/frontend/src/app/shared/pipes/index.ts @@ -0,0 +1,5 @@ +export * from './elasticsearch-highlight.pipe'; +export * from './geo-data.pipe'; +export * from './paragraph.pipe'; +export * from './regex-highlight.pipe'; +export * from './snippet.pipe'; diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts b/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts new file mode 100644 index 000000000..10f3ec4c0 --- /dev/null +++ b/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts @@ -0,0 +1,8 @@ +import { ParagraphPipe } from './paragraph.pipe'; + +describe('ParagraphPipe', () => { + it('create an instance', () => { + const pipe = new ParagraphPipe(); + expect(pipe).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.ts b/frontend/src/app/shared/pipes/paragraph.pipe.ts new file mode 100644 index 000000000..eebafc51a --- /dev/null +++ b/frontend/src/app/shared/pipes/paragraph.pipe.ts @@ -0,0 +1,23 @@ +import { Pipe, PipeTransform } from '@angular/core'; + + +@Pipe({ + name: 'paragraph', +}) +export class ParagraphPipe implements PipeTransform { + + transform(content: string | string[]): unknown { + const splitText = this.addParagraphTags(content); + return splitText; + } + + addParagraphTags(content: string | string[]) { + const paragraphs = typeof content === 'string' ? content.split('\n') : content; + if (!paragraphs || paragraphs.length === 1) { + return content; + } + return paragraphs.map(p => `

${p}

`).join(' '); + } + + +} diff --git a/frontend/src/app/search/highlight.pipe.ts b/frontend/src/app/shared/pipes/regex-highlight.pipe.ts similarity index 67% rename from frontend/src/app/search/highlight.pipe.ts rename to frontend/src/app/shared/pipes/regex-highlight.pipe.ts index 3ee84d437..595499503 100644 --- a/frontend/src/app/search/highlight.pipe.ts +++ b/frontend/src/app/shared/pipes/regex-highlight.pipe.ts @@ -1,24 +1,22 @@ import { Pipe, PipeTransform, SecurityContext } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; -import { HighlightService } from '../services/highlight.service'; +import { HighlightService } from '../../services/highlight.service'; @Pipe({ - name: 'highlight' + name: 'regexHighlight' }) -export class HighlightPipe implements PipeTransform { +export class RegexHighlightPipe implements PipeTransform { constructor(private sanitizer: DomSanitizer, private highlightService: HighlightService) { } /** * Transforms a text to highlight all the text matching the specified query. - * - * @param snippets Only show snippets. When this enabled, line breaks will also be replaced with -- */ - transform(text: string, query: string, snippets: boolean = false) { + transform(text: string, query: string) { const highlights = this.highlightService.highlight(text, query); - const parts = snippets ? this.highlightService.snippets(highlights) : Array.from(highlights); + const parts = Array.from(highlights); const highlightedText = parts.map(part => { - const sanitizedText = this.sanitizedLineBreaks(part.substring, snippets ? ' — ' : '
'); + const sanitizedText = this.sanitizedLineBreaks(part.substring, '
'); return part.isHit ? `${sanitizedText}` : sanitizedText; }).join(''); diff --git a/frontend/src/app/shared/pipes/snippet.pipe.ts b/frontend/src/app/shared/pipes/snippet.pipe.ts new file mode 100644 index 000000000..4634395cb --- /dev/null +++ b/frontend/src/app/shared/pipes/snippet.pipe.ts @@ -0,0 +1,20 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +@Pipe({ + name: 'snippet' +}) +export class SnippetPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms a text to only show its leading characters with an ellipsis + * + * @param nCharacters Specifies how many leading characters should be displayed + */ + transform(text: string, nCharacters=100) { + const snippedText = text.slice(0, nCharacters).concat('...'); + return this.sanitizer.bypassSecurityTrustHtml(snippedText); + } + +} diff --git a/frontend/src/app/shared/shared.module.ts b/frontend/src/app/shared/shared.module.ts index 3353638af..ee400898a 100644 --- a/frontend/src/app/shared/shared.module.ts +++ b/frontend/src/app/shared/shared.module.ts @@ -12,10 +12,10 @@ import { BalloonDirective } from '../balloon.directive'; import { DatePickerComponent } from '../corpus-selection/corpus-filter/date-picker/date-picker.component'; import { ErrorComponent } from '../error/error.component'; import { ScrollToDirective } from '../scroll-to.directive'; -import { HighlightPipe } from '../search'; import { DropdownModule } from './dropdown/dropdown.module'; import { TabPanelDirective } from './tabs/tab-panel.directive'; import { TabsComponent } from './tabs/tabs.component'; +import { ToggleComponent } from './toggle/toggle.component'; import { SlugifyPipe } from './pipes/slugify.pipe'; @NgModule({ @@ -23,10 +23,10 @@ import { SlugifyPipe } from './pipes/slugify.pipe'; DatePickerComponent, ErrorComponent, BalloonDirective, - HighlightPipe, ScrollToDirective, TabsComponent, TabPanelDirective, + ToggleComponent, SlugifyPipe, ], exports: [ @@ -45,11 +45,11 @@ import { SlugifyPipe } from './pipes/slugify.pipe'; FormsModule, FontAwesomeModule, BalloonDirective, - HighlightPipe, HttpClientModule, HttpClientXsrfModule, RouterModule, TableModule, + ToggleComponent, // Shared pipes SlugifyPipe, diff --git a/frontend/src/app/shared/toggle/toggle.component.html b/frontend/src/app/shared/toggle/toggle.component.html new file mode 100644 index 000000000..8b855a255 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.html @@ -0,0 +1,4 @@ +
+ + +
diff --git a/frontend/src/app/shared/toggle/toggle.component.scss b/frontend/src/app/shared/toggle/toggle.component.scss new file mode 100644 index 000000000..7ca30a115 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.scss @@ -0,0 +1,68 @@ +@import "../../../_utilities"; + +/* The switch - the box around the slider */ +.toggle-container { + position: absolute; + margin-left: .5rem; + margin-top: -.2rem; + display: inline-block; + width: 4rem; + height: 2rem; + pointer-events: none; + + /* Hide default HTML checkbox */ + input { + opacity: 0; + width: 100%; + height: 100%; + pointer-events: all; + } +} + + +/* The slider */ +.slider { + position: absolute; + cursor: pointer; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: $highlight-color; + transition: .4s; + border-radius: 35px; + pointer-events: none; + &:before { + content: ""; + z-index: 20; + position: absolute; + height: 1.6rem; + width: 1.6rem; + left: .2rem; + bottom: .2rem; + background-color: white; + transition: .4s; + border-radius: 50%; + } +} + + + +input:checked { + + .slider { + background-color: $primary; + } + + + .slider:before { + transform: translateX(2rem); + -webkit-transform: translateX(2rem); + -moz-transform: translateX(2rem); + -ms-transform: translateX(2rem); + -o-transform: translateX(2rem); + } + + + .slider:after { + left: calc(100% - 5px); + transform: translateX(-100%); + } +} \ No newline at end of file diff --git a/frontend/src/app/shared/toggle/toggle.component.spec.ts b/frontend/src/app/shared/toggle/toggle.component.spec.ts new file mode 100644 index 000000000..301ab008b --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { ToggleComponent } from './toggle.component'; + +describe('ToggleComponent', () => { + let component: ToggleComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ ToggleComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(ToggleComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/shared/toggle/toggle.component.ts b/frontend/src/app/shared/toggle/toggle.component.ts new file mode 100644 index 000000000..13194c3c8 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.ts @@ -0,0 +1,22 @@ +import { Component, EventEmitter, OnInit, Output } from '@angular/core'; + +@Component({ + selector: 'ia-toggle', + templateUrl: './toggle.component.html', + styleUrls: ['./toggle.component.scss'] +}) +export class ToggleComponent implements OnInit { + @Output() toggled = new EventEmitter(); + active = false; + + constructor() { } + + ngOnInit(): void { + } + + public toggleButton() { + this.active = !this.active; + this.toggled.emit(this.active); + } + +} diff --git a/frontend/src/app/word-models/word-models.component.html b/frontend/src/app/word-models/word-models.component.html index f0ec4c6b0..ca5255730 100644 --- a/frontend/src/app/word-models/word-models.component.html +++ b/frontend/src/app/word-models/word-models.component.html @@ -1,6 +1,6 @@ -
+
diff --git a/frontend/src/app/word-models/word-models.component.ts b/frontend/src/app/word-models/word-models.component.ts index f1353b265..566d9700d 100644 --- a/frontend/src/app/word-models/word-models.component.ts +++ b/frontend/src/app/word-models/word-models.component.ts @@ -18,7 +18,6 @@ export class WordModelsComponent extends ParamDirective { public searchSection: ElementRef; public isScrolledDown: boolean; - user: User; corpus: Corpus; queryText: string; @@ -79,7 +78,6 @@ export class WordModelsComponent extends ParamDirective { } async initialize(): Promise { - this.user = await this.authService.getCurrentUserPromise(); this.corpusService.currentCorpus.subscribe(this.setCorpus.bind(this)); } diff --git a/frontend/src/mock-data/constructor-helpers.ts b/frontend/src/mock-data/constructor-helpers.ts index 21c4a1aa1..b369eb305 100644 --- a/frontend/src/mock-data/constructor-helpers.ts +++ b/frontend/src/mock-data/constructor-helpers.ts @@ -3,8 +3,10 @@ import { Corpus, FieldValues, FoundDocument, HighlightResult, SearchHit } from '../app/models'; import { mockCorpus } from './corpus'; import { TagServiceMock } from './tag'; +import { EntityServiceMock } from './entity'; const tagService = new TagServiceMock() as any; +const entityService = new EntityServiceMock() as any; export const makeDocument = ( fieldValues: FieldValues, @@ -16,6 +18,6 @@ export const makeDocument = ( const hit: SearchHit = { _id: id, _score: relevance, _source: fieldValues, highlight }; - return new FoundDocument(tagService, corpus, hit); + return new FoundDocument(tagService, entityService, corpus, hit); }; diff --git a/frontend/src/mock-data/corpus.ts b/frontend/src/mock-data/corpus.ts index f1472315d..ae6e1d1eb 100644 --- a/frontend/src/mock-data/corpus.ts +++ b/frontend/src/mock-data/corpus.ts @@ -145,6 +145,7 @@ export const mockCorpus: Corpus = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: false, directDownloadLimit: 500, fields: [mockField, mockField2], languages: ['English'], @@ -163,6 +164,7 @@ export const mockCorpus2 = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: true, directDownloadLimit: 1000, fields: [mockField2], languages: ['English', 'French'], @@ -181,6 +183,7 @@ export const mockCorpus3: Corpus = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: false, directDownloadLimit: 2000, fields: [mockField, mockField2, mockField3, mockFieldDate, mockFieldMultipleChoice], languages: ['English'], diff --git a/frontend/src/mock-data/entity.ts b/frontend/src/mock-data/entity.ts new file mode 100644 index 000000000..9452852ec --- /dev/null +++ b/frontend/src/mock-data/entity.ts @@ -0,0 +1,12 @@ +import { of, Observable } from 'rxjs'; + +import { Corpus, NamedEntitiesResult } from '../app/models'; + +export class EntityServiceMock { + + public getDocumentEntities(corpus: Corpus, id: string): Observable { + return of({speech: [{entity: 'person', text: 'Wally'}, + {entity: 'flat', text: ' was last seen in '}, + {entity: 'location', text: 'Paris'}]}) + } +} diff --git a/frontend/src/mock-data/search.ts b/frontend/src/mock-data/search.ts index d5c4f233b..68d6d720b 100644 --- a/frontend/src/mock-data/search.ts +++ b/frontend/src/mock-data/search.ts @@ -3,7 +3,7 @@ import { SearchFilter } from '../app/models/field-filter'; import { Corpus, CorpusField, FoundDocument, QueryModel, SearchResults } from '../app/models/index'; import { mockCorpus } from './corpus'; import { TagServiceMock } from './tag'; -import { TagService } from '../app/services/tag.service'; +import { ElasticSearchServiceMock } from './elastic-search'; import { Aggregator } from '../app/models/aggregation'; export class SearchServiceMock { @@ -39,7 +39,8 @@ export class SearchServiceMock { loadResults(queryModel: QueryModel, resultsParams: PageResultsParameters): Promise { const doc = new FoundDocument( - new TagServiceMock() as unknown as TagService, + new TagServiceMock() as any, + new ElasticSearchServiceMock() as any, mockCorpus, { _id: 'test_1', diff --git a/frontend/src/styles.scss b/frontend/src/styles.scss index b6a8d5d8b..942fe4314 100644 --- a/frontend/src/styles.scss +++ b/frontend/src/styles.scss @@ -71,3 +71,31 @@ a.dropdown-item[disabled] { color: $grey; } } + +@mixin mark-entity($color) { + background-color: rgb(from $color r g b /.2); + border-bottom: .2em solid; + border-color: $color; + + .entity-icon, &.entity-icon { + padding-left: .3em; + padding-right: .3em; + color: $color; + } +} + +.entity-person { + @include mark-entity($entity-person); +} + +.entity-location { + @include mark-entity($entity-location); +} + +.entity-organization { + @include mark-entity($entity-organization); +} + +.entity-miscellaneous { + @include mark-entity($entity-miscellaneous); +}