diff --git a/cap/modules/deposit/api.py b/cap/modules/deposit/api.py index b366f5a76d..f148cafc83 100644 --- a/cap/modules/deposit/api.py +++ b/cap/modules/deposit/api.py @@ -61,6 +61,7 @@ from cap.modules.repos.tasks import download_repo, download_repo_file from cap.modules.repos.utils import (create_webhook, disconnect_subscriber, parse_git_url) +from cap.modules.services.serializers.zenodo import ZenodoUploadSchema from cap.modules.schemas.resolvers import (resolve_schema_by_url, schema_name_to_url) from cap.modules.user.errors import DoesNotExistInLDAP @@ -68,7 +69,7 @@ get_existing_or_register_user) from .errors import (DepositValidationError, UpdateDepositPermissionsError, - ReviewError) + ReviewError, InputValidationError) from .fetchers import cap_deposit_fetcher from .minters import cap_deposit_minter from .permissions import (AdminDepositPermission, CloneDepositPermission, @@ -269,12 +270,22 @@ def upload(self, pid, *args, **kwargs): 'Please connect your Zenodo account ' 'before creating a deposit.') - files = data.get('files') + files = data.get('files', []) bucket = data.get('bucket') - zenodo_data = data.get('zenodo_data', {}) + zenodo_data = data.get('zenodo_data') + + input = {'files': files, 'bucket': bucket} + if zenodo_data: + input['data'] = zenodo_data if files and bucket: - zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa + payload, errors = ZenodoUploadSchema().load(input) + if errors: + raise InputValidationError( + 'Validation error in Zenodo input data.', + errors=errors) + + zenodo_deposit = create_zenodo_deposit(token, payload) self.setdefault('_zenodo', []).append(zenodo_deposit) self.commit() diff --git a/cap/modules/deposit/errors.py b/cap/modules/deposit/errors.py index b1bdcc2e22..4dadefecf3 100644 --- a/cap/modules/deposit/errors.py +++ b/cap/modules/deposit/errors.py @@ -138,6 +138,21 @@ def __init__(self, description, errors=None, **kwargs): self.errors = [FieldError(e[0], e[1]) for e in errors.items()] +class InputValidationError(RESTValidationError): + """Review validation error exception.""" + + code = 400 + + description = "Validation error. Try again with valid data" + + def __init__(self, description, errors=None, **kwargs): + """Initialize exception.""" + super(InputValidationError, self).__init__(**kwargs) + + self.description = description or self.description + self.errors = [FieldError(e[0], e[1]) for e in errors.items()] + + class DataValidationError(RESTValidationError): """Review validation error exception.""" diff --git a/cap/modules/deposit/tasks.py b/cap/modules/deposit/tasks.py index 8c3f1e5201..21a052cc27 100644 --- a/cap/modules/deposit/tasks.py +++ b/cap/modules/deposit/tasks.py @@ -28,7 +28,6 @@ import requests from flask import current_app from celery import shared_task -from invenio_db import db from invenio_files_rest.models import FileInstance, ObjectVersion diff --git a/cap/modules/deposit/utils.py b/cap/modules/deposit/utils.py index 5e5aa88c49..9c4af3ec88 100644 --- a/cap/modules/deposit/utils.py +++ b/cap/modules/deposit/utils.py @@ -25,15 +25,16 @@ from __future__ import absolute_import, print_function +import json import requests from flask import current_app -from flask_login import current_user from invenio_access.models import Role from invenio_db import db from cap.modules.deposit.errors import AuthorizationError, \ DataValidationError, FileUploadError from cap.modules.records.utils import url_to_api_url +from cap.modules.services.serializers.zenodo import ZenodoDepositSchema def clean_empty_values(data): @@ -82,13 +83,16 @@ def add_api_to_links(links): return response -def create_zenodo_deposit(token, data): +def create_zenodo_deposit(token, data=None): """Create a Zenodo deposit using the logged in user's credentials.""" zenodo_url = current_app.config.get("ZENODO_SERVER_URL") + zenodo_data = data.get('data') + upload_data = {'metadata': zenodo_data} if zenodo_data else {} + deposit = requests.post( url=f'{zenodo_url}/deposit/depositions', params=dict(access_token=token), - json={'metadata': data}, + json=upload_data, headers={'Content-Type': 'application/json'} ) @@ -105,18 +109,5 @@ def create_zenodo_deposit(token, data): raise FileUploadError( 'Something went wrong, Zenodo deposit not created.') - # TODO: fix with serializers - data = deposit.json() - zenodo_deposit = { - 'id': data['id'], - 'title': data.get('metadata', {}).get('title'), - 'creator': current_user.id, - 'created': data['created'], - 'links': { - 'self': data['links']['self'], - 'bucket': data['links']['bucket'], - 'html': data['links']['html'], - 'publish': data['links']['publish'], - } - } - return zenodo_deposit + data = ZenodoDepositSchema().dump(deposit.json()).data + return data diff --git a/cap/modules/services/serializers/zenodo.py b/cap/modules/services/serializers/zenodo.py new file mode 100644 index 0000000000..f12c6076b2 --- /dev/null +++ b/cap/modules/services/serializers/zenodo.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Zenodo Serializer/Validator.""" + +import arrow +from flask_login import current_user +from marshmallow import Schema, fields, ValidationError, validate, validates, \ + validates_schema + +from invenio_files_rest.models import ObjectVersion + +DATE_REGEX = r'\d{4}-\d{2}-\d{2}' +DATE_ERROR = 'The date should follow the pattern YYYY-mm-dd.' +CHOICE_ERROR = lambda choices: f'Not a valid choice. Select one of: {choices}' # noqa + +UPLOAD_TYPES = [ + 'publication', + 'poster', + 'presentation', + 'dataset', + 'image', + 'video', + 'software', + 'lesson', + 'physicalobject', + 'other' +] +LICENSES = [ + 'CC-BY-4.0', + 'CC-BY-1.0', + 'CC-BY-2.0', + 'CC-BY-3.0' +] +ACCESS_RIGHTS = [ + 'open', + 'embargoed', + 'restricted', + 'closed' +] + + +class ZenodoCreatorsSchema(Schema): + name = fields.String(required=True) + affiliation = fields.String() + orcid = fields.String() + + +class ZenodoDepositMetadataSchema(Schema): + title = fields.String(required=True) + description = fields.String(required=True) + version = fields.String() + + keywords = fields.List(fields.String()) + creators = fields.List( + fields.Nested(ZenodoCreatorsSchema), required=True) + + upload_type = fields.String(required=True, validate=validate.OneOf( + UPLOAD_TYPES, error=CHOICE_ERROR(UPLOAD_TYPES))) + license = fields.String(required=True, validate=validate.OneOf( + LICENSES, error=CHOICE_ERROR(LICENSES))) + access_right = fields.String(required=True, validate=validate.OneOf( + ACCESS_RIGHTS, error=CHOICE_ERROR(ACCESS_RIGHTS))) + + publication_date = fields.String( + required=True, validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) + embargo_date = fields.String( + validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) + access_conditions = fields.String() + + @validates('embargo_date') + def validate_embargo_date(self, value): + """Validate that embargo date is in the future.""" + if arrow.get(value).date() <= arrow.utcnow().date(): + raise ValidationError( + 'Embargo date must be in the future.', + field_names=['embargo_date'] + ) + + @validates_schema() + def validate_license(self, data, **kwargs): + """Validate license.""" + access = data.get('access_right') + if access in ['open', 'embargoed'] and 'license' not in data: + raise ValidationError( + 'Required when access right is open or embargoed.', + field_names=['license'] + ) + if access == 'embargoed' and 'embargo_date' not in data: + raise ValidationError( + 'Required when access right is embargoed.', + field_names=['embargo_date'] + ) + if access == 'restricted' and 'access_conditions' not in data: + raise ValidationError( + 'Required when access right is restricted.', + field_names=['access_conditions'] + ) + + +class ZenodoUploadSchema(Schema): + files = fields.List(fields.String(), required=True) + data = fields.Nested(ZenodoDepositMetadataSchema, default=dict()) + bucket = fields.String(required=True) + + @validates_schema() + def validate_files(self, data, **kwargs): + bucket = data['bucket'] + files = data['files'] + + for _file in files: + obj = ObjectVersion.get(bucket, _file) + if not obj: + raise ValidationError( + f'File {_file} not found in bucket.', + field_names=['files'] + ) + + +class ZenodoDepositSchema(Schema): + id = fields.Int(dump_only=True) + created = fields.String(dump_only=True) + + title = fields.Method('get_title', dump_only=True, allow_none=True) + creator = fields.Method('get_creator', dump_only=True, allow_none=True) + links = fields.Method('get_links', dump_only=True) + + def get_creator(self, data): + return current_user.id if current_user else None + + def get_title(self, data): + return data.get('metadata', {}).get('title') + + def get_links(self, data): + return { + 'self': data['links']['self'], + 'bucket': data['links']['bucket'], + 'html': data['links']['html'], + 'publish': data['links']['publish'] + } diff --git a/tests/integration/test_zenodo_upload.py b/tests/integration/test_zenodo_upload.py index 2591c38ca6..c202ff3caf 100644 --- a/tests/integration/test_zenodo_upload.py +++ b/tests/integration/test_zenodo_upload.py @@ -173,7 +173,15 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w files=['test-file.txt'], zenodo_data={ 'title': 'test-title', - 'description': 'This is my first upload' + 'description': 'This is my first upload', + 'upload_type': 'poster', + 'creators': [ + {'name': 'User Tester', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2050-09-09' })), headers=headers) assert resp.status_code == 201 @@ -191,7 +199,6 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w @patch('cap.modules.deposit.api._fetch_token', return_value='test-token') -@responses.activate def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_file, auth_headers_for_user, json_headers): user = users['cms_user'] @@ -200,26 +207,79 @@ def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_fil pid = deposit_with_file['_deposit']['id'] bucket = deposit_with_file.files.bucket + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt', 'not-found.txt'], + zenodo_data={ + 'description': 'This is my first upload', + 'creators': [ + {'name': 'User Tester', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20' + })), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Validation error in Zenodo input data.' + assert resp.json['errors'] == [{ + 'field': 'data', + 'message': { + 'upload_type': ['Missing data for required field.'], + 'title': ['Missing data for required field.']} + }, { + 'field': 'files', + 'message': ['File not-found.txt not found in bucket.'] + }] + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_create_and_upload_to_zenodo_with_wrong_files(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + # MOCK RESPONSES FROM ZENODO SERVER + # first the deposit creation responses.add(responses.POST, f'{zenodo_server_url}/deposit/depositions', json={ - 'status': 400, - 'message': 'Validation error.', - 'errors': [ - {'field': 'test', 'message': 'Unknown field name.'} - ]}, - status=400) + 'id': 111, + 'record_id': 111, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + }, + status=200) + # create the zenodo deposit with app.test_client() as client: resp = client.post(f'/deposits/{pid}/actions/upload', data=json.dumps(dict(target='zenodo', bucket=str(bucket), - files=['test-file.txt'], - zenodo_data={'test': 'test'})), + files=['test-file.txt', 'not-exists.txt'])), headers=headers) + assert resp.status_code == 400 - assert resp.json['message'] == 'Validation error on creating the Zenodo deposit.' - assert resp.json['errors'] == [{'field': 'test', 'message': 'Unknown field name.'}] + assert resp.json['message'] == 'Validation error in Zenodo input data.' + assert resp.json['errors'] == [{ + 'field': 'files', + 'message': ['File not-exists.txt not found in bucket.'] + }] + + @patch('cap.modules.deposit.api._fetch_token', return_value='test-token') diff --git a/tests/unit/schemas/test_zenodo_serializers.py b/tests/unit/schemas/test_zenodo_serializers.py new file mode 100644 index 0000000000..96d14cd9a2 --- /dev/null +++ b/tests/unit/schemas/test_zenodo_serializers.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Zenodo upload serializers.""" + +from cap.modules.services.serializers.zenodo import ZenodoUploadSchema, ZenodoDepositSchema + + +def test_zenodo_upload_serializer(app, deposit_with_file): + bucket = deposit_with_file.files.bucket + + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == {} + assert payload == data + + # not existing files + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt', 'no-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == {'files': ['File no-file.txt not found in bucket.']} + + # missing required fields + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == { + 'data': { + 'upload_type': ['Missing data for required field.'], + 'description': ['Missing data for required field.']} + } + + # embargo date in the past + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2015-09-09' + }, + 'files': ['test-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == { + 'data': { + 'embargo_date': ['Embargo date must be in the future.'] + }} + + # malformed dates + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11', + 'embargo_date': '2015-01' + }, + 'files': ['test-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == { + 'data': { + 'publication_date': ['The date should follow the pattern YYYY-mm-dd.'], + 'embargo_date': ['The date should follow the pattern YYYY-mm-dd.'] + }} + + # wrong enum in license/upload/access + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'test', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'test', + 'license': 'test', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == { + 'data': { + 'license': ["Not a valid choice. Select one of: ['CC-BY-4.0', 'CC-BY-1.0', 'CC-BY-2.0', 'CC-BY-3.0']"], + 'access_right': ["Not a valid choice. Select one of: ['open', 'embargoed', 'restricted', 'closed']"], + 'upload_type': ["Not a valid choice. Select one of: ['publication', 'poster', " + "'presentation', 'dataset', 'image', 'video', 'software', " + "'lesson', 'physicalobject', 'other']"] + } + } + + # access conditional + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'restricted', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'], + 'bucket': str(bucket) + } + payload, errors = ZenodoUploadSchema().load(data) + assert errors == { + 'data': { + 'access_conditions': ['Required when access right is restricted.'] + }} + + +def test_zenodo_deposit_serializer(): + payload = { + 'id': 111, + 'record_id': 111, + 'metadata': { + 'title': 'test' + }, + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + } + + data = ZenodoDepositSchema().dump(payload).data + assert data['id'] == 111 + assert data['title'] == 'test' + assert data['creator'] is None