Skip to content

Commit

Permalink
Merge branch 'develop' into feature/manual-navigation
Browse files Browse the repository at this point in the history
  • Loading branch information
lukavdplas authored Jun 12, 2024
2 parents f1b4c9b + 61f5378 commit c1fc23b
Show file tree
Hide file tree
Showing 46 changed files with 3,548 additions and 3,333 deletions.
1 change: 1 addition & 0 deletions .nvmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
18.17.1
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ keywords:
- elasticsearch
- natural language processing
license: MIT
version: 5.6.2
date-released: '2024-05-06'
version: 5.7.0
date-released: '2024-06-5'
2 changes: 1 addition & 1 deletion backend/corpora/dutchannualreports/dutchannualreports.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def sources(self, start=min_date, end=max_date):
full_path = op.join(directory, filename)
file_path = op.join(rel_dir, filename)
image_path = op.join(
rel_dir, name + '.' + self.scan_image_type)
rel_dir, name + '.pdf')
if extension != '.xml':
logger.debug(self.non_xml_msg.format(full_path))
continue
Expand Down
2 changes: 1 addition & 1 deletion backend/corpora/dutchnewspapers/dutchnewspapers_public.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def sources(self, start=min_date, end=max_date):
self.definition_pattern.search(filename)), None)
if not definition_file:
continue
meta_dict = self.metadata_from_xml(definition_file, tags=[
meta_dict = self._metadata_from_xml(definition_file, tags=[
"title",
"date",
"publisher",
Expand Down
2 changes: 1 addition & 1 deletion backend/corpora/ecco/ecco.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def sources(self, start=min_date, end=max_date):
'Volume'
]

meta_dict = self.metadata_from_xml(
meta_dict = self._metadata_from_xml(
full_path, tags=meta_tags)
meta_dict['id'] = record_id
meta_dict['category'] = category
Expand Down
2 changes: 1 addition & 1 deletion backend/media/image_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def retrieve_pdf(path):
'''
Retrieve the pdf as a file object.
'''
pdf = PdfReader(path, 'rb')
pdf = PdfReader(path)

return pdf

Expand Down
2 changes: 1 addition & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ threadpoolctl==3.2.0
# via scikit-learn
tomli==2.0.1
# via pytest
tornado==6.3.3
tornado==6.4.1
# via
# django-livereload-server
# flower
Expand Down
14 changes: 10 additions & 4 deletions documentation/Django-project-settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,17 @@ The values in the dictionary give specifications.
- `'scroll_timeout'`: Time before scroll results time out
- `'scroll_page_size'`: Number of results per scroll page

The following optional settings are implemented but have no documentation:
### API key

- `'certs_location'`
- `'api_key'`
- `'api_id'`
By default, an elasticsearch server will have security features enabled; you can turn this off for a local development server (see [first-time setup](./First-time-setup.md)). Otherwise, the server configuration must specify an API key.

Create an API key for the server: see [creating an API key](https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html). Note down the `'id'` and `'api_key'` values of the response.

Add the following values to the configuration:

- `'certs_location'`: Fill in the following path: `{your_elasticsearch_directory}/config/certs/http_ca.crt`
- `'api_id'`: the ID of the API key
- `'api_key'`: the generated API key


#### Setting a default server
Expand Down
42 changes: 13 additions & 29 deletions documentation/First-time-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ These are instructions to set up an I-analyzer server. If you are going to devel
* Python == 3.9
* PostgreSQL >= 10, client, server and C libraries
* [ElasticSearch](https://www.elastic.co/) 8. To avoid a lot of errors, choose the option: install elasticsearch with .zip or .tar.gz. ES wil install everything in one folder, and not all over your machine, which happens with other options.
* [Redis](https://www.redis.io/) (used by [Celery](http://www.celeryproject.org/)). Recommended installation is [installing from source](https://redis.io/docs/getting-started/installation/install-redis-from-source/)
* Yarn
* [Redis](https://www.redis.io/). Recommended installation is [installing from source](https://redis.io/docs/getting-started/installation/install-redis-from-source/)
* [Node.js](https://nodejs.org/). See [.nvmrc](/.nvmrc) for the recommended version.
* [Yarn](https://yarnpkg.com/)

The documentation includes a [recipe for installing the prerequisites on Debian 10](./documentation/Local-Debian-I-Analyzer-setup.md)

Expand All @@ -18,35 +19,18 @@ For the SAML integration, the following libraries are required: xmlsec, python3-

To get an instance running, do all of the following inside an activated `virtualenv`:

1. Install the ElasticSearch v.8 (https://www.elastic.co/) and postgreSQL on the server or your local machine. To avoid a lot of errors, choose the option: install elasticsearch with .zip or .tar.gz. ES wil install everything in one folder, and not all over your machine, which happens with other options.
2. For an easy setup, locate the file `config/elasticsearch.yaml` in your Elasticsearch directory, and set the variable `xpack.security.enabled: false`. Alternatively, you can leave this on its default value(`true`), but then you need to [generate API keys](https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html) and set up your SERVERS config like so:
```
SERVERS = {
# Default ElasticSearch server
'default': {
'host': 'localhost',
'port': 9200,
'certs_location': '{your_elasticsearch_directory/config/certs/http_ca.crt}'
'api_id': '{generated_api_id}'
'api_key': '{generated_api_key}'
}
}
```
3. Start your ElasticSearch Server. Make sure cross-origin handling (the setting [http.cors.enabled](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html)) is set up correctly, or a proxy has been configured, for the server to be accessible by the web user. For example, edit `elasticsearch.yml` to include the following:
```
http.cors.enabled: true
http.cors.allow-origin: "*"
```
4. Create and activate a virtualenv for Python.
5. Create the file `backend/ianalyzer/settings_local.py`.`ianalyzer/settings_local.py` is included in .gitignore and thus not cloned to your machine. It can be used to customise your environment, and to include the corpora that are defined in the source code in your environment. See instructions of adding corpora below.
6. Install the requirements for both the backend and frontend:
```
1. Create the file `backend/ianalyzer/settings_local.py`.`ianalyzer/settings_local.py` is included in .gitignore and thus not cloned to your machine. It can be used to customise your environment. You can leave the file empty for now.
2. Install the requirements for both the backend and frontend:
```sh
yarn postinstall
```
7. Set up your postgres database by going to the backend directory and running `psql -f create_db.sql`
The backend readme provides more details on these steps.
8. Set up the database and migrations by running `yarn django migrate`.
9. Make a superuser account with `yarn django createsuperuser`
3. For an easy setup, locate the file `config/elasticsearch.yml` in your Elasticsearch directory, and set the variable `xpack.security.enabled: false`. Alternatively, you can leave this on its default value(`true`), but this requires [additional settings](./Django-project-settings.md#api-key).
4. Set up your postgres database:
```sh
psql -f backend/create_db.sql
yarn django migrate
```
5. Make a superuser account with `yarn django createsuperuser`

## Setup with Docker
Alternatively, you can run the application via Docker:
Expand Down
13 changes: 9 additions & 4 deletions frontend/.browserslistrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
# For additional information regarding the format and rule options, please see:
# https://github.com/browserslist/browserslist#queries

# For the full list of supported browsers by the Angular framework, please see:
# https://angular.io/guide/browser-support

# You can see what browsers were selected by your queries by running:
# npx browserslist

> 0.5%
last 2 versions
defaults
last 2 Chrome versions
last 1 Firefox version
last 2 Edge major versions
last 2 Safari major versions
last 2 iOS major versions
Firefox ESR
not dead
not IE 9-11 # For IE 9-11 support, remove 'not'.
125 changes: 58 additions & 67 deletions frontend/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -1,69 +1,60 @@
{
"root": true,
"ignorePatterns": [
"projects/**/*"
],
"overrides": [
{
"files": [
"*.ts"
],
"parserOptions": {
"project": ["tsconfig.json"],
"createDefaultProgram": true
},
"extends": [
"plugin:@angular-eslint/ng-cli-compat",
"plugin:@angular-eslint/ng-cli-compat--formatting-add-on",
"plugin:@angular-eslint/template/process-inline-templates"
],
"rules": {
"@angular-eslint/no-output-on-prefix": "off",
"@angular-eslint/component-selector": [
"error",
{
"type": ["element", "attribute"],
"prefix": "ia",
"style": "kebab-case"
}
],
"@angular-eslint/directive-selector": [
"error",
{
"prefix": "ia"
}
],
"@typescript-eslint/member-ordering": "warn",
"@typescript-eslint/naming-convention": [
"warn",
{
"selector": "objectLiteralProperty",
"format": ["camelCase", "snake_case", "PascalCase"],
"leadingUnderscore": "allow"
}
],
"@typescript-eslint/consistent-type-definitions": "error",
"@typescript-eslint/dot-notation": "off",
"id-blacklist": "off",
"no-underscore-dangle": "off",
"quotes": [
"warn",
"single",
{
"avoidEscape": true,
"allowTemplateLiterals": true
}
]
}
},
{
"files": [
"*.html"
],
"extends": [
"plugin:@angular-eslint/template/recommended"
],
"rules": {}
}
]
"root": true,
"ignorePatterns": ["projects/**/*"],
"plugins": ["@angular-eslint", "@typescript-eslint"],
"overrides": [
{
"files": ["*.ts"],
"parserOptions": {
"project": ["tsconfig.json"],
"createDefaultProgram": true
},
"extends": [
"plugin:@angular-eslint/template/process-inline-templates"
],
"rules": {
"@angular-eslint/no-output-on-prefix": "off",
"@angular-eslint/component-selector": [
"error",
{
"type": ["element", "attribute"],
"prefix": "ia",
"style": "kebab-case"
}
],
"@angular-eslint/directive-selector": [
"error",
{
"prefix": "ia"
}
],
"@typescript-eslint/member-ordering": "warn",
"@typescript-eslint/naming-convention": [
"warn",
{
"selector": "objectLiteralProperty",
"format": ["camelCase", "snake_case", "PascalCase"],
"leadingUnderscore": "allow"
}
],
"@typescript-eslint/consistent-type-definitions": "error",
"@typescript-eslint/dot-notation": "off",
"id-blacklist": "off",
"no-underscore-dangle": "off",
"quotes": [
"warn",
"single",
{
"avoidEscape": true,
"allowTemplateLiterals": true
}
]
}
},
{
"files": ["*.html"],
"extends": ["plugin:@angular-eslint/template/recommended"],
"rules": {}
}
]
}
Loading

0 comments on commit c1fc23b

Please sign in to comment.