Skip to content

Commit

Permalink
Merge branch 'develop' into bugfix/nn-reset
Browse files Browse the repository at this point in the history
  • Loading branch information
JeltevanBoheemen authored Jul 5, 2024
2 parents e9ee10f + 6093a65 commit 20d25dc
Show file tree
Hide file tree
Showing 11 changed files with 105 additions and 40 deletions.
20 changes: 20 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,26 @@
"/*": "*",
"/./~/*": "${webRoot}/node_modules/*"
}
},
{
"name": "celery",
"type": "debugpy",
"request": "launch",
"cwd": "${workspaceFolder}/backend",
"env": {
"PYTHONPATH": "${workspaceFolder}/backend"
},
"module": "celery",
"console": "integratedTerminal",
"args": [
"-A",
"ianalyzer.celery",
"worker",
"--pool=solo",
"--concurrency=1",
"--events",
"--loglevel=info"
]
}
],
"inputs": [
Expand Down
6 changes: 5 additions & 1 deletion backend/addcorpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,12 @@ def clean(self):
e
])


class CorpusDocumentationPage(models.Model):
class PageType(models.TextChoices):
GENERAL = ('general', 'General information')
CITATION = ('citation', 'Citation')
LICENSE = ('license', 'Licence')
LICENSE = ('license', 'License')
TERMS_OF_SERVICE = ('terms_of_service', 'Terms of service')
WORDMODELS = ('wordmodels', 'Word models')

Expand All @@ -472,6 +473,9 @@ class PageType(models.TextChoices):
help_text='markdown contents of the documentation'
)

def __str__(self):
return f'{self.corpus_configuration.corpus.name} - {self.type}'

class Meta:
constraints = [
UniqueConstraint(
Expand Down
7 changes: 6 additions & 1 deletion backend/addcorpus/tests/test_corpus_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ def test_no_corpora(db, settings, admin_client):
def test_corpus_documentation_view(admin_client, basic_mock_corpus, settings):
response = admin_client.get(f'/api/corpus/documentation/{basic_mock_corpus}/')
assert response.status_code == 200
pages = response.data

# check that the pages are sorted in canonical order
page_types = [page['type'] for page in pages]
assert page_types == ['General information', 'Citation', 'License']

# should contain citation guidelines
citation_page = next(page for page in response.data if page['type'] == 'Citation')
citation_page = next(page for page in pages if page['type'] == 'Citation')

# check that the page template is rendered with context
content = citation_page['content']
Expand Down
18 changes: 13 additions & 5 deletions backend/addcorpus/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,30 @@ def send_corpus_file(corpus='', subdir='', filename=''):

return FileResponse(open(path, 'rb'))


class CorpusDocumentationPageViewset(viewsets.ModelViewSet):
permission_classes = [IsAuthenticatedOrReadOnly, CorpusAccessPermission]
serializer_class = CorpusDocumentationPageSerializer

def get_queryset(self):
corpus_name = corpus_name_from_request(self.request)
pages = CorpusDocumentationPage.objects.filter(corpus_configuration__corpus__name=corpus_name)

@staticmethod
def get_relevant_pages(pages, corpus_name):
# only include wordmodels documentation if models are present
if Corpus.objects.get(name=corpus_name).has_python_definition:
definition = load_corpus_definition(corpus_name)
if definition.word_models_present:
return pages

return pages.exclude(type=CorpusDocumentationPage.PageType.WORDMODELS)

def get_queryset(self):
corpus_name = corpus_name_from_request(self.request)
pages = CorpusDocumentationPage.objects.filter(
corpus_configuration__corpus__name=corpus_name)
relevant_pages = self.get_relevant_pages(pages, corpus_name)
canonical_order = [e.value for e in CorpusDocumentationPage.PageType]

return sorted(
relevant_pages, key=lambda p: canonical_order.index(p.type))


class CorpusImageView(APIView):
'''
Expand Down
1 change: 1 addition & 0 deletions backend/corpora_test/basic/license/license.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Do whatever you please.
2 changes: 2 additions & 0 deletions backend/corpora_test/basic/mock_csv_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class MockCSVCorpus(CSVCorpusDefinition):
max_date = datetime.datetime(year=2022, month=12, day=31)
data_directory = os.path.join(here, 'source_data')
citation_page = 'citation.md'
license_page = 'license.md'
description_page = 'mock-csv-corpus.md'

languages = ['en']
category = 'book'
Expand Down
2 changes: 2 additions & 0 deletions backend/corpora_test/media/media_mock_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class MediaMockCorpus(MockCSVCorpus):
data_directory = os.path.join(here, 'source_data')
scan_image_type = 'image/png'
citation_page = None
license_page = None
description_page = None

def request_media(self, document, corpus_name):
field_values = document['fieldValues']
Expand Down
11 changes: 2 additions & 9 deletions backend/download/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,12 @@ def make_download(request_json, download_id, download_size=None):
es_query = api_query_to_es_query(request_json, corpus_name)
results, _total = es_download.scroll(
corpus_name, es_query, download_size)

filepath = create_csv.search_results_csv(
results, request_json['fields'], query, download_id)
results, request_json['fields'], query.get_query_text(es_query), download_id)
return filepath


def create_query(request_json):
"""
format the route of the search into a query string
"""
route = request_json.get('route')
return re.sub(r';|%\d+', '_', re.sub(r'\$', '', route.split('/')[2]))


def try_download(tasks_func, download):
'''
Try initialising a task chain for a download. Marks the download
Expand Down
19 changes: 19 additions & 0 deletions backend/download/tests/test_download_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,22 @@ def test_unauthenticated_download(db, client, basic_mock_corpus, basic_corpus_pu
download_objects = Download.objects.all()
assert download_objects.count() == 1
assert download_objects.first().user == None

def test_query_text_in_csv(db, client, basic_mock_corpus, basic_corpus_public, index_basic_mock_corpus):
es_query = query.set_query_text(mock_match_all_query(), 'ghost')
download_request_json = {
'corpus': basic_mock_corpus,
'es_query': es_query,
'fields': ['character', 'line'],
'route': f"/search/{basic_mock_corpus}",
'encoding': 'utf-8'
}
response = client.post('/api/download/search_results',
download_request_json,
content_type='application/json'
)
assert status.is_success(response.status_code)
stream = read_file_response(response, 'utf-8')
reader = csv.DictReader(stream, delimiter=';')
row = next(reader)
assert row['query'] == 'ghost'
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ import { APIEditableCorpus, CorpusDefinition } from '../../models/corpus-definit
import * as _ from 'lodash';
import { Router } from '@angular/router';
import { HttpErrorResponse } from '@angular/common/http';
import { Subject } from 'rxjs';

@Component({
selector: 'ia-create-definition',
templateUrl: './create-definition.component.html',
styleUrls: ['./create-definition.component.scss']
styleUrls: ['./create-definition.component.scss'],
})
export class CreateDefinitionComponent {
actionIcons = actionIcons;
Expand All @@ -19,6 +20,8 @@ export class CreateDefinitionComponent {

error: Error;

reset$ = new Subject<void>();

constructor(private apiService: ApiService, private router: Router) {
this.corpus = new CorpusDefinition(apiService);
}
Expand All @@ -31,12 +34,15 @@ export class CreateDefinitionComponent {
this.error = undefined;
this.corpus.save().subscribe(
(result: APIEditableCorpus) => {
this.router.navigate(['/corpus-definitions', 'edit', result.id]);
this.router.navigate([
'/corpus-definitions',
'edit',
result.id,
]);
},
(err: HttpErrorResponse) => {
this.error = err;
}
);
}

}
47 changes: 26 additions & 21 deletions frontend/src/app/visualization/barchart/histogram.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,28 +213,33 @@ export class HistogramComponent
formatDownload: this.formatDownloadValue,
isOptional: 'relative_doc_count' !== valueKey,
},
{
key: 'match_count',
label: 'Token Frequency',
format: this.formatValue('raw'),
formatDownload: this.formatDownloadValue,
isOptional: 'match_count' !== valueKey,
},
{
key: 'matches_by_doc_count',
label: 'Relative Frequency (documents)',
format: this.formatValue('documents'),
formatDownload: this.formatDownloadValue,
isOptional: 'matches_by_doc_count' !== valueKey,
},
{
key: 'matches_by_token_count',
label: 'Relative Frequency (terms)',
format: this.formatValue('terms'),
formatDownload: this.formatDownloadValue,
isOptional: 'matches_by_token_count' !== valueKey,
},
];
if (this.frequencyMeasure == 'tokens') {
// Headers related to tokens should not be applied to document visualizations
this.tableHeaders = this.tableHeaders.concat([
{
key: 'match_count',
label: 'Token Frequency',
format: this.formatValue('raw'),
formatDownload: this.formatDownloadValue,
isOptional: 'match_count' !== valueKey,
},
{
key: 'matches_by_doc_count',
label: 'Relative Frequency (documents)',
format: this.formatValue('documents'),
formatDownload: this.formatDownloadValue,
isOptional: 'matches_by_doc_count' !== valueKey,
},
{
key: 'matches_by_token_count',
label: 'Relative Frequency (terms)',
format: this.formatValue('terms'),
formatDownload: this.formatDownloadValue,
isOptional: 'matches_by_token_count' !== valueKey,
},
]);
}
}
}
}

0 comments on commit 20d25dc

Please sign in to comment.