From 99200b1cc59c3b73efdb713085d90f5e5985d100 Mon Sep 17 00:00:00 2001 From: Ilias Koutsakis Date: Tue, 17 Nov 2020 20:03:24 +0100 Subject: [PATCH 1/3] services: create zenodo deposit through CAP * updates the config, each user has access to their Zenod ccount/token * creates deposit, with metadata * uploads files to deposit * integration tests * closes #1938 * closes #1934 Signed-off-by: Ilias Koutsakis --- cap/config.py | 5 +- cap/modules/deposit/api.py | 122 +++++--- cap/modules/deposit/errors.py | 27 ++ cap/modules/deposit/tasks.py | 56 ++++ cap/modules/deposit/utils.py | 45 +++ cap/modules/services/views/zenodo.py | 27 -- docker-services.yml | 2 + tests/conftest.py | 19 +- tests/integration/test_zenodo_upload.py | 398 ++++++++++++++++++++++++ 9 files changed, 624 insertions(+), 77 deletions(-) create mode 100644 cap/modules/deposit/tasks.py create mode 100644 tests/integration/test_zenodo_upload.py diff --git a/cap/config.py b/cap/config.py index 088981fdfc..de5b362328 100644 --- a/cap/config.py +++ b/cap/config.py @@ -720,10 +720,7 @@ def _(x): # Zenodo # ====== -ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', - 'https://zenodo.org/api') - -ZENODO_ACCESS_TOKEN = os.environ.get('APP_ZENODO_ACCESS_TOKEN', 'CHANGE_ME') +ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', 'https://zenodo.org/api') # noqa # Endpoints # ========= diff --git a/cap/modules/deposit/api.py b/cap/modules/deposit/api.py index 042bb6d059..b366f5a76d 100644 --- a/cap/modules/deposit/api.py +++ b/cap/modules/deposit/api.py @@ -49,6 +49,7 @@ from sqlalchemy.orm.exc import NoResultFound from werkzeug.local import LocalProxy +from cap.modules.auth.ext import _fetch_token from cap.modules.deposit.errors import DisconnectWebhookError, FileUploadError from cap.modules.deposit.validators import NoRequiredValidator from cap.modules.experiments.permissions import exp_need_factory @@ -76,6 +77,8 @@ UpdateDepositPermission) from .review import Reviewable +from .tasks import upload_to_zenodo +from .utils import create_zenodo_deposit _datastore = LocalProxy(lambda: current_app.extensions['security'].datastore) @@ -256,53 +259,82 @@ def upload(self, pid, *args, **kwargs): _, rec = request.view_args.get('pid_value').data record_uuid = str(rec.id) data = request.get_json() - webhook = data.get('webhook', False) - event_type = data.get('event_type', 'release') - - try: - url = data['url'] - except KeyError: - raise FileUploadError('Missing url parameter.') - - try: - host, owner, repo, branch, filepath = parse_git_url(url) - api = create_git_api(host, owner, repo, branch, - current_user.id) - - if filepath: - if webhook: - raise FileUploadError( - 'You cannot create a webhook on a file') - - download_repo_file( - record_uuid, - f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa - *api.get_file_download(filepath), - api.auth_headers, - ) - elif webhook: - if event_type == 'release': - if branch: - raise FileUploadError( - 'You cannot create a release webhook' - ' for a specific branch or sha.') + target = data.get('target') + + if target == 'zenodo': + # check for token + token = _fetch_token('zenodo') + if not token: + raise FileUploadError( + 'Please connect your Zenodo account ' + 'before creating a deposit.') + + files = data.get('files') + bucket = data.get('bucket') + zenodo_data = data.get('zenodo_data', {}) + + if files and bucket: + zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa + self.setdefault('_zenodo', []).append(zenodo_deposit) + self.commit() + + # upload files to zenodo deposit + upload_to_zenodo.delay( + files, bucket, token, + zenodo_deposit['id'], + zenodo_deposit['links']['bucket']) + else: + raise FileUploadError( + 'You cannot create an empty Zenodo deposit. ' + 'Please add some files.') + else: + webhook = data.get('webhook', False) + event_type = data.get('event_type', 'release') - if event_type == 'push' and \ - api.branch is None and api.sha: - raise FileUploadError( - 'You cannot create a push webhook' - ' for a specific sha.') + try: + url = data['url'] + except KeyError: + raise FileUploadError('Missing url parameter.') - create_webhook(record_uuid, api, event_type) - else: - download_repo.delay( - record_uuid, - f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa - api.get_repo_download(), - api.auth_headers) - - except GitError as e: - raise FileUploadError(str(e)) + try: + host, owner, repo, branch, filepath = parse_git_url(url) # noqa + api = create_git_api(host, owner, repo, branch, + current_user.id) + + if filepath: + if webhook: + raise FileUploadError( + 'You cannot create a webhook on a file') + + download_repo_file( + record_uuid, + f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa + *api.get_file_download(filepath), + api.auth_headers, + ) + elif webhook: + if event_type == 'release': + if branch: + raise FileUploadError( + 'You cannot create a release webhook' + ' for a specific branch or sha.') + + if event_type == 'push' and \ + api.branch is None and api.sha: + raise FileUploadError( + 'You cannot create a push webhook' + ' for a specific sha.') + + create_webhook(record_uuid, api, event_type) + else: + download_repo.delay( + record_uuid, + f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa + api.get_repo_download(), + api.auth_headers) + + except GitError as e: + raise FileUploadError(str(e)) return self diff --git a/cap/modules/deposit/errors.py b/cap/modules/deposit/errors.py index bd48431dcc..b1bdcc2e22 100644 --- a/cap/modules/deposit/errors.py +++ b/cap/modules/deposit/errors.py @@ -87,6 +87,18 @@ def __init__(self, description, **kwargs): self.description = description or self.description +class AuthorizationError(RESTException): + """Exception during authorization.""" + + code = 401 + + def __init__(self, description, **kwargs): + """Initialize exception.""" + super(AuthorizationError, self).__init__(**kwargs) + + self.description = description or self.description + + class DisconnectWebhookError(RESTException): """Exception during disconnecting webhook for analysis.""" @@ -124,3 +136,18 @@ def __init__(self, description, errors=None, **kwargs): self.description = description or self.description self.errors = [FieldError(e[0], e[1]) for e in errors.items()] + + +class DataValidationError(RESTValidationError): + """Review validation error exception.""" + + code = 400 + + description = "Validation error. Try again with valid data" + + def __init__(self, description, errors=None, **kwargs): + """Initialize exception.""" + super(DataValidationError, self).__init__(**kwargs) + + self.description = description or self.description + self.errors = [FieldError(e['field'], e['message']) for e in errors] diff --git a/cap/modules/deposit/tasks.py b/cap/modules/deposit/tasks.py new file mode 100644 index 0000000000..8c3f1e5201 --- /dev/null +++ b/cap/modules/deposit/tasks.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2018 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +"""Tasks.""" + +from __future__ import absolute_import, print_function + +import requests +from flask import current_app +from celery import shared_task +from invenio_db import db +from invenio_files_rest.models import FileInstance, ObjectVersion + + +@shared_task(autoretry_for=(Exception, ), + retry_kwargs={ + 'max_retries': 5, + 'countdown': 10 + }) +def upload_to_zenodo(files, bucket, token, zenodo_depid, zenodo_bucket_url): + """Upload to Zenodo the files the user selected.""" + for filename in files: + file_obj = ObjectVersion.get(bucket, filename) + file_ins = FileInstance.get(file_obj.file_id) + + with open(file_ins.uri, 'rb') as fp: + file = requests.put( + url=f'{zenodo_bucket_url}/{filename}', + data=fp, + params=dict(access_token=token), + ) + + if not file.ok: + current_app.logger.error( + f'Uploading file {filename} to deposit {zenodo_depid} ' + f'failed with {file.status_code}.') diff --git a/cap/modules/deposit/utils.py b/cap/modules/deposit/utils.py index 6e551cdb52..5e5aa88c49 100644 --- a/cap/modules/deposit/utils.py +++ b/cap/modules/deposit/utils.py @@ -25,9 +25,14 @@ from __future__ import absolute_import, print_function +import requests +from flask import current_app +from flask_login import current_user from invenio_access.models import Role from invenio_db import db +from cap.modules.deposit.errors import AuthorizationError, \ + DataValidationError, FileUploadError from cap.modules.records.utils import url_to_api_url @@ -75,3 +80,43 @@ def add_api_to_links(links): item['links'] = add_api_to_links(item.get('links')) return response + + +def create_zenodo_deposit(token, data): + """Create a Zenodo deposit using the logged in user's credentials.""" + zenodo_url = current_app.config.get("ZENODO_SERVER_URL") + deposit = requests.post( + url=f'{zenodo_url}/deposit/depositions', + params=dict(access_token=token), + json={'metadata': data}, + headers={'Content-Type': 'application/json'} + ) + + if not deposit.ok: + if deposit.status_code == 401: + raise AuthorizationError( + 'Authorization to Zenodo failed. Please reconnect.') + if deposit.status_code == 400: + data = deposit.json() + if data.get('message') == 'Validation error.': + raise DataValidationError( + 'Validation error on creating the Zenodo deposit.', + errors=data.get('errors')) + raise FileUploadError( + 'Something went wrong, Zenodo deposit not created.') + + # TODO: fix with serializers + data = deposit.json() + zenodo_deposit = { + 'id': data['id'], + 'title': data.get('metadata', {}).get('title'), + 'creator': current_user.id, + 'created': data['created'], + 'links': { + 'self': data['links']['self'], + 'bucket': data['links']['bucket'], + 'html': data['links']['html'], + 'publish': data['links']['publish'], + } + } + return zenodo_deposit diff --git a/cap/modules/services/views/zenodo.py b/cap/modules/services/views/zenodo.py index 99f8d78a24..7830cc957b 100644 --- a/cap/modules/services/views/zenodo.py +++ b/cap/modules/services/views/zenodo.py @@ -27,7 +27,6 @@ import requests from flask import current_app, jsonify -from invenio_files_rest.models import FileInstance, ObjectVersion from . import blueprint @@ -48,29 +47,3 @@ def get_zenodo_record(zenodo_id): """Get record from zenodo (route).""" resp, status = _get_zenodo_record(zenodo_id) return jsonify(resp), status - - -@blueprint.route('/zenodo//') -def upload_to_zenodo(bucket_id, filename): - """Upload code to zenodo.""" - zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') - params = {"access_token": current_app.config.get( - 'ZENODO_ACCESS_TOKEN')} - filename = filename + '.tar.gz' - - r = requests.post(zenodo_server_url, - params=params, json={}, - ) - - file_obj = ObjectVersion.get(bucket_id, filename) - file = FileInstance.get(file_obj.file_id) - - bucket_url = r.json()['links']['bucket'] - with open(file.uri, 'rb') as fp: - response = requests.put( - bucket_url + '/{}'.format(filename), - data=fp, - params=params, - ) - - return jsonify({"status": response.status_code}) diff --git a/docker-services.yml b/docker-services.yml index 670af14627..729dae948e 100644 --- a/docker-services.yml +++ b/docker-services.yml @@ -27,6 +27,8 @@ services: - "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3" - "INVENIO_CERN_APP_CREDENTIALS_KEY=CHANGE_ME" - "INVENIO_CERN_APP_CREDENTIALS_SECRET=CHANGE_ME" + - "INVENIO_ZENODO_CLIENT_ID=CHANGE_ME" + - "INVENIO_ZENODO_CLIENT_SECRET=CHANGE_ME" - "DEV_HOST=CHANGE_ME" lb: build: ./docker/haproxy/ diff --git a/tests/conftest.py b/tests/conftest.py index 73546b75fb..1d9290b79d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,7 @@ import tempfile from datetime import datetime, timedelta from uuid import uuid4 +from six import BytesIO import pytest from flask import current_app @@ -129,7 +130,8 @@ def default_config(): MAIL_DEFAULT_SENDER="analysis-preservation-support@cern.ch", CMS_STATS_COMMITEE_AND_PAGS={'key': {'contacts': []}}, PDF_FORUM_MAIL='pdf-forum-test@cern0.ch', - CONVENERS_ML_MAIL='ml-conveners-test@cern0.ch') + CONVENERS_ML_MAIL='ml-conveners-test@cern0.ch', + ZENODO_SERVER_URL='https://zenodo-test.org') @pytest.fixture(scope='session') @@ -422,6 +424,21 @@ def deposit(example_user, create_deposit): ) +@pytest.fixture +def deposit_with_file(example_user, create_schema, create_deposit): + """New deposit with files.""" + create_schema('test-schema', experiment='CMS') + return create_deposit( + example_user, + 'test-schema', + { + '$ana_type': 'test-schema', + 'title': 'test title' + }, + files={'test-file.txt': BytesIO(b'Hello world!')}, + experiment='CMS') + + @pytest.fixture def record(example_user, create_deposit): """Example record.""" diff --git a/tests/integration/test_zenodo_upload.py b/tests/integration/test_zenodo_upload.py new file mode 100644 index 0000000000..2591c38ca6 --- /dev/null +++ b/tests/integration/test_zenodo_upload.py @@ -0,0 +1,398 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Integration tests for Zenodo Upload API.""" +import json +import re +from flask import current_app +from invenio_pidstore.resolver import Resolver + +import responses +from mock import patch + +from cap.modules.deposit.api import CAPDeposit + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_create_and_upload_to_zenodo(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + # MOCK RESPONSES FROM ZENODO SERVER + # first the deposit creation + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + }, + status=200) + + # then the file upload + responses.add(responses.PUT, + 'http://zenodo-test.com/test-bucket/test-file.txt', + json={ + 'mimetype': 'application/octet-stream', + 'links': { + 'self': 'https://sandbox.zenodo.org/api/files/test-bucket/test-file.txt', + 'uploads': 'https://sandbox.zenodo.org/api/files/test-bucket/test-file.txt?uploads' + }, + 'key': 'test-file.txt', + 'size': 100 + }, + status=200) + + # fix because responses makes request to ES, and deposit.commit() won't work without it + responses.add_callback( + responses.PUT, + re.compile(r'http://localhost:9200/deposits-records-test-schema-v1\.0\.0/' + r'test-schema-v1\.0\.0/(.*)?version=(.*)&version_type=external_gte'), + callback=lambda req: (200, {}, json.dumps({})), + content_type='application/json', + ) + + # create the zenodo deposit + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 201 + + resolver = Resolver(pid_type='depid', + object_type='rec', + getter=lambda x: x) + _, uuid = resolver.resolve(pid) + record = CAPDeposit.get_record(uuid) + + assert len(record['_zenodo']) == 1 + assert record['_zenodo'][0]['id'] == 111 + assert record['_zenodo'][0]['title'] == None + assert record['_zenodo'][0]['created'] == '2020-11-20T11:49:39.147767+00:00' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + # MOCK RESPONSES FROM ZENODO SERVER + # first the deposit creation + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'submitted': False, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'metadata': { + 'description': 'This is my first upload', + 'title': 'test-title' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + }, + status=200) + + # then the file upload + responses.add(responses.PUT, + 'http://zenodo-test.com/test-bucket/test-file.txt', + json={ + 'mimetype': 'application/octet-stream', + 'links': { + 'self': 'https://sandbox.zenodo.org/api/files/test-bucket/test-file.txt', + 'uploads': 'https://sandbox.zenodo.org/api/files/test-bucket/test-file.txt?uploads' + }, + 'key': 'test-file.txt', + 'size': 100 + }, + status=200) + + # fix because responses makes request to ES, and deposit.commit() won't work without it + responses.add_callback( + responses.PUT, + re.compile(r'http://localhost:9200/deposits-records-test-schema-v1\.0\.0/' + r'test-schema-v1\.0\.0/(.*)?version=(.*)&version_type=external_gte'), + callback=lambda req: (200, {}, json.dumps({})), + content_type='application/json', + ) + + # create the zenodo deposit + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'], + zenodo_data={ + 'title': 'test-title', + 'description': 'This is my first upload' + })), + headers=headers) + assert resp.status_code == 201 + + resolver = Resolver(pid_type='depid', + object_type='rec', + getter=lambda x: x) + _, uuid = resolver.resolve(pid) + record = CAPDeposit.get_record(uuid) + + assert len(record['_zenodo']) == 1 + assert record['_zenodo'][0]['id'] == 111 + assert record['_zenodo'][0]['title'] == 'test-title' + assert record['_zenodo'][0]['created'] == '2020-11-20T11:49:39.147767+00:00' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'status': 400, + 'message': 'Validation error.', + 'errors': [ + {'field': 'test', 'message': 'Unknown field name.'} + ]}, + status=400) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'], + zenodo_data={'test': 'test'})), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Validation error on creating the Zenodo deposit.' + assert resp.json['errors'] == [{'field': 'test', 'message': 'Unknown field name.'}] + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_authorization_failure(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={}, + status=401) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 401 + assert resp.json['message'] == 'Authorization to Zenodo failed. Please reconnect.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_deposit_not_created_error(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={}, + status=500) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Something went wrong, Zenodo deposit not created.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_file_not_uploaded_error(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers, capsys): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'submitted': False, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + }, + status=200) + + responses.add(responses.PUT, + 'http://zenodo-test.com/test-bucket/test-file.txt', + json={}, + status=500) + + responses.add_callback( + responses.PUT, + re.compile(r'http://localhost:9200/deposits-records-test-schema-v1\.0\.0/' + r'test-schema-v1\.0\.0/(.*)?version=(.*)&version_type=external_gte'), + callback=lambda req: (200, {}, json.dumps({})), + content_type='application/json', + ) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 201 + + captured = capsys.readouterr() + assert 'Uploading file test-file.txt to deposit 111 failed with 500' \ + in captured.err + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_empty_files(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + headers = auth_headers_for_user(user) + json_headers + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'submitted': False, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [] + }, + status=200) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=[])), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'You cannot create an empty Zenodo deposit. Please add some files.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value=None) +def test_zenodo_upload_no_token(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Please connect your Zenodo account before creating a deposit.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +def test_zenodo_upload_no_access(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['lhcb_user'] + headers = auth_headers_for_user(user) + json_headers + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 403 From 077f5ca93879ecaaa531e123c7a1389f475935ac Mon Sep 17 00:00:00 2001 From: Ilias Koutsakis Date: Fri, 20 Nov 2020 20:51:29 +0100 Subject: [PATCH 2/3] services: metadata input on zenodo deposit creation * adds serializers/validation for metadata input * adds unit tests for zenodo serializer * closes #1952 Signed-off-by: Ilias Koutsakis --- cap/modules/deposit/api.py | 37 ++-- cap/modules/deposit/errors.py | 15 ++ cap/modules/deposit/tasks.py | 16 +- cap/modules/deposit/utils.py | 25 +-- cap/modules/services/serializers/zenodo.py | 166 ++++++++++++++ tests/integration/test_zenodo_upload.py | 84 ++++++- tests/unit/schemas/test_zenodo_serializers.py | 208 ++++++++++++++++++ 7 files changed, 499 insertions(+), 52 deletions(-) create mode 100644 cap/modules/services/serializers/zenodo.py create mode 100644 tests/unit/schemas/test_zenodo_serializers.py diff --git a/cap/modules/deposit/api.py b/cap/modules/deposit/api.py index b366f5a76d..3477a9f1a9 100644 --- a/cap/modules/deposit/api.py +++ b/cap/modules/deposit/api.py @@ -61,6 +61,7 @@ from cap.modules.repos.tasks import download_repo, download_repo_file from cap.modules.repos.utils import (create_webhook, disconnect_subscriber, parse_git_url) +from cap.modules.services.serializers.zenodo import ZenodoUploadSchema from cap.modules.schemas.resolvers import (resolve_schema_by_url, schema_name_to_url) from cap.modules.user.errors import DoesNotExistInLDAP @@ -68,7 +69,7 @@ get_existing_or_register_user) from .errors import (DepositValidationError, UpdateDepositPermissionsError, - ReviewError) + ReviewError, InputValidationError) from .fetchers import cap_deposit_fetcher from .minters import cap_deposit_minter from .permissions import (AdminDepositPermission, CloneDepositPermission, @@ -257,7 +258,7 @@ def upload(self, pid, *args, **kwargs): with UpdateDepositPermission(self).require(403): if request: _, rec = request.view_args.get('pid_value').data - record_uuid = str(rec.id) + recid = str(rec.id) data = request.get_json() target = data.get('target') @@ -269,20 +270,26 @@ def upload(self, pid, *args, **kwargs): 'Please connect your Zenodo account ' 'before creating a deposit.') - files = data.get('files') - bucket = data.get('bucket') - zenodo_data = data.get('zenodo_data', {}) + files = data.get('files', []) + zenodo_data = data.get('zenodo_data') + input = dict(files=files, data=zenodo_data) \ + if zenodo_data else dict(files=files) - if files and bucket: - zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa - self.setdefault('_zenodo', []).append(zenodo_deposit) + if files: + _, errors = ZenodoUploadSchema(recid=recid).load(input) + if errors: + raise InputValidationError( + 'Validation error in Zenodo input data.', + errors=errors) + + deposit = create_zenodo_deposit(token, zenodo_data) + self.setdefault('_zenodo', []).append(deposit) self.commit() # upload files to zenodo deposit - upload_to_zenodo.delay( - files, bucket, token, - zenodo_deposit['id'], - zenodo_deposit['links']['bucket']) + upload_to_zenodo.delay(files, recid, token, + deposit['id'], + deposit['links']['bucket']) else: raise FileUploadError( 'You cannot create an empty Zenodo deposit. ' @@ -307,7 +314,7 @@ def upload(self, pid, *args, **kwargs): 'You cannot create a webhook on a file') download_repo_file( - record_uuid, + recid, f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa *api.get_file_download(filepath), api.auth_headers, @@ -325,10 +332,10 @@ def upload(self, pid, *args, **kwargs): 'You cannot create a push webhook' ' for a specific sha.') - create_webhook(record_uuid, api, event_type) + create_webhook(recid, api, event_type) else: download_repo.delay( - record_uuid, + recid, f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa api.get_repo_download(), api.auth_headers) diff --git a/cap/modules/deposit/errors.py b/cap/modules/deposit/errors.py index b1bdcc2e22..4dadefecf3 100644 --- a/cap/modules/deposit/errors.py +++ b/cap/modules/deposit/errors.py @@ -138,6 +138,21 @@ def __init__(self, description, errors=None, **kwargs): self.errors = [FieldError(e[0], e[1]) for e in errors.items()] +class InputValidationError(RESTValidationError): + """Review validation error exception.""" + + code = 400 + + description = "Validation error. Try again with valid data" + + def __init__(self, description, errors=None, **kwargs): + """Initialize exception.""" + super(InputValidationError, self).__init__(**kwargs) + + self.description = description or self.description + self.errors = [FieldError(e[0], e[1]) for e in errors.items()] + + class DataValidationError(RESTValidationError): """Review validation error exception.""" diff --git a/cap/modules/deposit/tasks.py b/cap/modules/deposit/tasks.py index 8c3f1e5201..954e2b8174 100644 --- a/cap/modules/deposit/tasks.py +++ b/cap/modules/deposit/tasks.py @@ -28,8 +28,7 @@ import requests from flask import current_app from celery import shared_task -from invenio_db import db -from invenio_files_rest.models import FileInstance, ObjectVersion +from invenio_files_rest.models import FileInstance @shared_task(autoretry_for=(Exception, ), @@ -37,20 +36,23 @@ 'max_retries': 5, 'countdown': 10 }) -def upload_to_zenodo(files, bucket, token, zenodo_depid, zenodo_bucket_url): +def upload_to_zenodo(files, recid, token, zenodo_depid, zenodo_bucket_url): """Upload to Zenodo the files the user selected.""" + from cap.modules.deposit.api import CAPDeposit + rec = CAPDeposit.get_record(recid) + for filename in files: - file_obj = ObjectVersion.get(bucket, filename) + file_obj = rec.files[filename] file_ins = FileInstance.get(file_obj.file_id) with open(file_ins.uri, 'rb') as fp: - file = requests.put( + resp = requests.put( url=f'{zenodo_bucket_url}/{filename}', data=fp, params=dict(access_token=token), ) - if not file.ok: + if not resp.ok: current_app.logger.error( f'Uploading file {filename} to deposit {zenodo_depid} ' - f'failed with {file.status_code}.') + f'failed with {resp.status_code}.') diff --git a/cap/modules/deposit/utils.py b/cap/modules/deposit/utils.py index 5e5aa88c49..02c34cc2c7 100644 --- a/cap/modules/deposit/utils.py +++ b/cap/modules/deposit/utils.py @@ -27,13 +27,13 @@ import requests from flask import current_app -from flask_login import current_user from invenio_access.models import Role from invenio_db import db from cap.modules.deposit.errors import AuthorizationError, \ DataValidationError, FileUploadError from cap.modules.records.utils import url_to_api_url +from cap.modules.services.serializers.zenodo import ZenodoDepositSchema def clean_empty_values(data): @@ -82,13 +82,15 @@ def add_api_to_links(links): return response -def create_zenodo_deposit(token, data): +def create_zenodo_deposit(token, data=None): """Create a Zenodo deposit using the logged in user's credentials.""" zenodo_url = current_app.config.get("ZENODO_SERVER_URL") + zenodo_data = {'metadata': data} if data else {} + deposit = requests.post( url=f'{zenodo_url}/deposit/depositions', params=dict(access_token=token), - json={'metadata': data}, + json=zenodo_data, headers={'Content-Type': 'application/json'} ) @@ -105,18 +107,5 @@ def create_zenodo_deposit(token, data): raise FileUploadError( 'Something went wrong, Zenodo deposit not created.') - # TODO: fix with serializers - data = deposit.json() - zenodo_deposit = { - 'id': data['id'], - 'title': data.get('metadata', {}).get('title'), - 'creator': current_user.id, - 'created': data['created'], - 'links': { - 'self': data['links']['self'], - 'bucket': data['links']['bucket'], - 'html': data['links']['html'], - 'publish': data['links']['publish'], - } - } - return zenodo_deposit + data = ZenodoDepositSchema().dump(deposit.json()).data + return data diff --git a/cap/modules/services/serializers/zenodo.py b/cap/modules/services/serializers/zenodo.py new file mode 100644 index 0000000000..39e91e1f82 --- /dev/null +++ b/cap/modules/services/serializers/zenodo.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Zenodo Serializer/Validator.""" + +import arrow +from flask_login import current_user +from marshmallow import Schema, fields, ValidationError, validate, validates, \ + validates_schema + +DATE_REGEX = r'\d{4}-\d{2}-\d{2}' +DATE_ERROR = 'The date should follow the pattern YYYY-mm-dd.' + +UPLOAD_TYPES = [ + 'publication', + 'poster', + 'presentation', + 'dataset', + 'image', + 'video', + 'software', + 'lesson', + 'physicalobject', + 'other' +] +LICENSES = [ + 'CC-BY-4.0', + 'CC-BY-1.0', + 'CC-BY-2.0', + 'CC-BY-3.0' +] +ACCESS_RIGHTS = [ + 'open', + 'embargoed', + 'restricted', + 'closed' +] + + +def choice_error_msg(choices): + return f'Not a valid choice. Select one of: {choices}' + + +class ZenodoCreatorsSchema(Schema): + name = fields.String(required=True) + affiliation = fields.String() + orcid = fields.String() + + +class ZenodoDepositMetadataSchema(Schema): + title = fields.String(required=True) + description = fields.String(required=True) + version = fields.String() + + keywords = fields.List(fields.String()) + creators = fields.List( + fields.Nested(ZenodoCreatorsSchema), required=True) + + upload_type = fields.String(required=True, validate=validate.OneOf( + UPLOAD_TYPES, error=choice_error_msg(UPLOAD_TYPES))) + license = fields.String(required=True, validate=validate.OneOf( + LICENSES, error=choice_error_msg(LICENSES))) + access_right = fields.String(required=True, validate=validate.OneOf( + ACCESS_RIGHTS, error=choice_error_msg(ACCESS_RIGHTS))) + + publication_date = fields.String( + required=True, validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) + embargo_date = fields.String( + validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) + access_conditions = fields.String() + + @validates('embargo_date') + def validate_embargo_date(self, value): + """Validate that embargo date is in the future.""" + if arrow.get(value).date() <= arrow.utcnow().date(): + raise ValidationError( + 'Embargo date must be in the future.', + field_names=['embargo_date'] + ) + + @validates_schema() + def validate_license(self, data, **kwargs): + """Validate license according to what Zenodo expects.""" + access = data.get('access_right') + if access in ['open', 'embargoed'] and 'license' not in data: + raise ValidationError( + 'Required when access right is open or embargoed.', + field_names=['license'] + ) + if access == 'embargoed' and 'embargo_date' not in data: + raise ValidationError( + 'Required when access right is embargoed.', + field_names=['embargo_date'] + ) + if access == 'restricted' and 'access_conditions' not in data: + raise ValidationError( + 'Required when access right is restricted.', + field_names=['access_conditions'] + ) + + +class ZenodoUploadSchema(Schema): + files = fields.List(fields.String(), required=True) + data = fields.Nested(ZenodoDepositMetadataSchema, default=dict()) + + def __init__(self, *args, **kwargs): + self.recid = kwargs.pop('recid') if 'recid' in kwargs else None + super(Schema, self).__init__(*args, **kwargs) + + @validates_schema() + def validate_files(self, data, **kwargs): + """Check if the files exist in this deposit.""" + from cap.modules.deposit.api import CAPDeposit + rec = CAPDeposit.get_record(self.recid) + + for _file in data['files']: + if _file not in rec.files.keys: + raise ValidationError( + f'File `{_file}` not found in record.', + field_names=['files'] + ) + + +class ZenodoDepositSchema(Schema): + id = fields.Int(dump_only=True) + created = fields.String(dump_only=True) + + title = fields.Method('get_title', dump_only=True, allow_none=True) + creator = fields.Method('get_creator', dump_only=True, allow_none=True) + links = fields.Method('get_links', dump_only=True) + + def get_creator(self, data): + return current_user.id if current_user else None + + def get_title(self, data): + return data.get('metadata', {}).get('title') + + def get_links(self, data): + return { + 'self': data['links']['self'], + 'bucket': data['links']['bucket'], + 'html': data['links']['html'], + 'publish': data['links']['publish'] + } diff --git a/tests/integration/test_zenodo_upload.py b/tests/integration/test_zenodo_upload.py index 2591c38ca6..4da17e0fb1 100644 --- a/tests/integration/test_zenodo_upload.py +++ b/tests/integration/test_zenodo_upload.py @@ -173,7 +173,15 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w files=['test-file.txt'], zenodo_data={ 'title': 'test-title', - 'description': 'This is my first upload' + 'description': 'This is my first upload', + 'upload_type': 'poster', + 'creators': [ + {'name': 'User Tester', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2050-09-09' })), headers=headers) assert resp.status_code == 201 @@ -191,7 +199,6 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w @patch('cap.modules.deposit.api._fetch_token', return_value='test-token') -@responses.activate def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_file, auth_headers_for_user, json_headers): user = users['cms_user'] @@ -200,26 +207,79 @@ def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_fil pid = deposit_with_file['_deposit']['id'] bucket = deposit_with_file.files.bucket + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt', 'not-found.txt'], + zenodo_data={ + 'description': 'This is my first upload', + 'creators': [ + {'name': 'User Tester', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20' + })), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Validation error in Zenodo input data.' + assert resp.json['errors'] == [{ + 'field': 'data', + 'message': { + 'upload_type': ['Missing data for required field.'], + 'title': ['Missing data for required field.']} + }, { + 'field': 'files', + 'message': ['File `not-found.txt` not found in record.'] + }] + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_create_and_upload_to_zenodo_with_wrong_files(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + # MOCK RESPONSES FROM ZENODO SERVER + # first the deposit creation responses.add(responses.POST, f'{zenodo_server_url}/deposit/depositions', json={ - 'status': 400, - 'message': 'Validation error.', - 'errors': [ - {'field': 'test', 'message': 'Unknown field name.'} - ]}, - status=400) + 'id': 111, + 'record_id': 111, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + }, + status=200) + # create the zenodo deposit with app.test_client() as client: resp = client.post(f'/deposits/{pid}/actions/upload', data=json.dumps(dict(target='zenodo', bucket=str(bucket), - files=['test-file.txt'], - zenodo_data={'test': 'test'})), + files=['test-file.txt', 'not-exists.txt'])), headers=headers) + assert resp.status_code == 400 - assert resp.json['message'] == 'Validation error on creating the Zenodo deposit.' - assert resp.json['errors'] == [{'field': 'test', 'message': 'Unknown field name.'}] + assert resp.json['message'] == 'Validation error in Zenodo input data.' + assert resp.json['errors'] == [{ + 'field': 'files', + 'message': ['File `not-exists.txt` not found in record.'] + }] + + @patch('cap.modules.deposit.api._fetch_token', return_value='test-token') diff --git a/tests/unit/schemas/test_zenodo_serializers.py b/tests/unit/schemas/test_zenodo_serializers.py new file mode 100644 index 0000000000..14235f706f --- /dev/null +++ b/tests/unit/schemas/test_zenodo_serializers.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Zenodo upload serializers.""" + +from cap.modules.services.serializers.zenodo import ZenodoUploadSchema, ZenodoDepositSchema + + +def test_zenodo_upload_serializer(app, deposit_with_file): + recid = str(deposit_with_file.id) + + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'] + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == {} + assert payload == data + + # not existing files + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt', 'no-file.txt'], + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == {'files': ['File `no-file.txt` not found in record.']} + + # missing required fields + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'] + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == { + 'data': { + 'upload_type': ['Missing data for required field.'], + 'description': ['Missing data for required field.']} + } + + # embargo date in the past + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2015-09-09' + }, + 'files': ['test-file.txt'] + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == { + 'data': { + 'embargo_date': ['Embargo date must be in the future.'] + }} + + # malformed dates + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11', + 'embargo_date': '2015-01' + }, + 'files': ['test-file.txt'] + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == { + 'data': { + 'publication_date': ['The date should follow the pattern YYYY-mm-dd.'], + 'embargo_date': ['The date should follow the pattern YYYY-mm-dd.'] + }} + + # wrong enum in license/upload/access + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'test', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'test', + 'license': 'test', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'] + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == { + 'data': { + 'license': ["Not a valid choice. Select one of: ['CC-BY-4.0', 'CC-BY-1.0', 'CC-BY-2.0', 'CC-BY-3.0']"], + 'access_right': ["Not a valid choice. Select one of: ['open', 'embargoed', 'restricted', 'closed']"], + 'upload_type': ["Not a valid choice. Select one of: ['publication', 'poster', " + "'presentation', 'dataset', 'image', 'video', 'software', " + "'lesson', 'physicalobject', 'other']"] + } + } + + # access conditional + data = { + 'data': { + 'title': 'My first upload yoohoo', + 'upload_type': 'poster', + 'description': 'This is my first upload', + 'creators': [ + {'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'restricted', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2030-09-09' + }, + 'files': ['test-file.txt'] + } + payload, errors = ZenodoUploadSchema(recid=recid).load(data) + assert errors == { + 'data': { + 'access_conditions': ['Required when access right is restricted.'] + }} + + +def test_zenodo_deposit_serializer(): + payload = { + 'id': 111, + 'record_id': 111, + 'metadata': { + 'title': 'test' + }, + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [], + 'created': '2020-11-20T11:49:39.147767+00:00' + } + + data = ZenodoDepositSchema().dump(payload).data + assert data['id'] == 111 + assert data['title'] == 'test' + assert data['creator'] is None From 6c29103b91d6866117c46d9a735f6b0028f3d1e5 Mon Sep 17 00:00:00 2001 From: Ilias Koutsakis Date: Wed, 25 Nov 2020 17:00:02 +0100 Subject: [PATCH 3/3] services: add handling of async results of upload * adds the task results to the record * addresses #1970 Signed-off-by: Ilias Koutsakis --- cap/modules/deposit/api.py | 8 +-- cap/modules/deposit/tasks.py | 84 ++++++++++++++++++++---- cap/modules/deposit/utils.py | 14 ++++ cap/modules/fixtures/schemas/zenodo.json | 69 +++++++++++++++++++ cap/modules/services/views/zenodo.py | 18 +++++ tests/integration/test_zenodo_upload.py | 5 +- 6 files changed, 178 insertions(+), 20 deletions(-) create mode 100644 cap/modules/fixtures/schemas/zenodo.json diff --git a/cap/modules/deposit/api.py b/cap/modules/deposit/api.py index 3477a9f1a9..af2fd55ad9 100644 --- a/cap/modules/deposit/api.py +++ b/cap/modules/deposit/api.py @@ -78,7 +78,7 @@ UpdateDepositPermission) from .review import Reviewable -from .tasks import upload_to_zenodo +from .tasks import create_zenodo_upload_tasks from .utils import create_zenodo_deposit _datastore = LocalProxy(lambda: current_app.extensions['security'].datastore) @@ -287,9 +287,9 @@ def upload(self, pid, *args, **kwargs): self.commit() # upload files to zenodo deposit - upload_to_zenodo.delay(files, recid, token, - deposit['id'], - deposit['links']['bucket']) + create_zenodo_upload_tasks(files, recid, token, + deposit['id'], + deposit['links']['bucket']) else: raise FileUploadError( 'You cannot create an empty Zenodo deposit. ' diff --git a/cap/modules/deposit/tasks.py b/cap/modules/deposit/tasks.py index 954e2b8174..faf43f4a97 100644 --- a/cap/modules/deposit/tasks.py +++ b/cap/modules/deposit/tasks.py @@ -21,14 +21,36 @@ # In applying this license, CERN does not # waive the privileges and immunities granted to it by virtue of its status # as an Intergovernmental Organization or submit itself to any jurisdiction. -"""Tasks.""" +"""Zenodo Upload Tasks.""" from __future__ import absolute_import, print_function import requests from flask import current_app -from celery import shared_task +from celery import chord, group, current_task, shared_task + from invenio_files_rest.models import FileInstance +from invenio_db import db + +from cap.modules.experiments.errors import ExternalAPIException +from .utils import get_zenodo_deposit_from_record + + +def create_zenodo_upload_tasks(files, recid, token, + zenodo_depid, zenodo_bucket_url): + """Create the upload tasks and get the results.""" + current_app.logger.info( + f'Uploading files to Zenodo {zenodo_depid}: {files}.') + + # the only way to have a task that waits for + # other tasks to finish is the `chord` structure + upload_callback = save_results_to_record.s(depid=zenodo_depid, recid=recid) + upload_tasks = group( + upload.s(filename, recid, token, zenodo_bucket_url) + for filename in files + ) + + chord(upload_tasks, upload_callback).delay() @shared_task(autoretry_for=(Exception, ), @@ -36,23 +58,59 @@ 'max_retries': 5, 'countdown': 10 }) -def upload_to_zenodo(files, recid, token, zenodo_depid, zenodo_bucket_url): - """Upload to Zenodo the files the user selected.""" +def upload(filename, recid, token, zenodo_bucket_url): + """Upload file to Zenodo.""" from cap.modules.deposit.api import CAPDeposit - rec = CAPDeposit.get_record(recid) + record = CAPDeposit.get_record(recid) - for filename in files: - file_obj = rec.files[filename] - file_ins = FileInstance.get(file_obj.file_id) + file_obj = record.files[filename] + file_ins = FileInstance.get(file_obj.file_id) + task_id = current_task.request.id - with open(file_ins.uri, 'rb') as fp: + with open(file_ins.uri, 'rb') as fp: + try: resp = requests.put( url=f'{zenodo_bucket_url}/{filename}', data=fp, params=dict(access_token=token), ) - if not resp.ok: - current_app.logger.error( - f'Uploading file {filename} to deposit {zenodo_depid} ' - f'failed with {resp.status_code}.') + current_app.logger.error( + f'{task_id}: Zenodo upload of file `{filename}`: {resp.status_code}.') # noqa + + status = resp.status_code + msg = resp.json() + except (ValueError, ExternalAPIException) as err: + status = 'FAILED' + msg = str(err) + + current_app.logger.error( + f'{task_id}: Something went wrong with the task:\n{msg}') + finally: + return { + 'task_id': task_id, + 'result': {'file': filename, 'status': status, 'message': msg} + } + + +@shared_task(autoretry_for=(Exception, ), + retry_kwargs={ + 'max_retries': 5, + 'countdown': 10 + }) +def save_results_to_record(tasks, depid, recid): + """Save the results of uploading to the record.""" + from cap.modules.deposit.api import CAPDeposit + record = CAPDeposit.get_record(recid) + + # update the tasks of the specified zenodo deposit (filename: status) + # this way we can attach multiple deposits + zenodo = get_zenodo_deposit_from_record(record, depid) + for task in tasks: + zenodo['tasks'][task['task_id']] = task['result'] + + record.commit() + db.session.commit() + + current_app.logger.info( + f'COMPLETED: Zenodo {depid} uploads:\n{tasks}') diff --git a/cap/modules/deposit/utils.py b/cap/modules/deposit/utils.py index 02c34cc2c7..ab57a6d7ca 100644 --- a/cap/modules/deposit/utils.py +++ b/cap/modules/deposit/utils.py @@ -82,6 +82,20 @@ def add_api_to_links(links): return response +def get_zenodo_deposit_from_record(record, pid): + """Get the related Zenodo information from a record.""" + try: + index = [idx for idx, deposit in enumerate(record['_zenodo']) + if deposit['id'] == pid][0] + + # set an empty dict as tasks if there is none + record['_zenodo'][index].setdefault('tasks', {}) + return record['_zenodo'][index] + except IndexError: + raise FileUploadError( + 'The Zenodo pid you provided is not associated with this record.') + + def create_zenodo_deposit(token, data=None): """Create a Zenodo deposit using the logged in user's credentials.""" zenodo_url = current_app.config.get("ZENODO_SERVER_URL") diff --git a/cap/modules/fixtures/schemas/zenodo.json b/cap/modules/fixtures/schemas/zenodo.json new file mode 100644 index 0000000000..d127a12f54 --- /dev/null +++ b/cap/modules/fixtures/schemas/zenodo.json @@ -0,0 +1,69 @@ +{ + "name": "zenodo", + "version": "0.0.1", + "fullname": "", + "experiment": null, + "is_indexed": false, + "use_deposit_as_record": true, + "allow_all": true, + "deposit_schema": { + "additionalProperties": "False", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "number" + }, + "created": { + "type": "string" + }, + "title": { + "type": "string" + }, + "creator": { + "type": "string" + }, + "links": { + "type": "object", + "properties": { + "self": { + "type": "string" + }, + "html": { + "type": "string" + }, + "publish": { + "type": "string" + }, + "bucket": { + "type": "string" + } + } + }, + "tasks": { + "type": "object", + "patternProperties": { + "^[0-F]{8}-([0-F]{4}-){3}[0-F]{12}$": { + "type": "object", + "properties": { + "file": { + "type": "string" + }, + "status": { + "type": "string" + }, + "message": { + "type": "string" + } + } + } + } + } + }, + "title": "Zenodo Deposit" + } + }, + "deposit_mapping": {}, + "deposit_options": {} +} diff --git a/cap/modules/services/views/zenodo.py b/cap/modules/services/views/zenodo.py index 7830cc957b..e26bec5908 100644 --- a/cap/modules/services/views/zenodo.py +++ b/cap/modules/services/views/zenodo.py @@ -27,8 +27,11 @@ import requests from flask import current_app, jsonify +from invenio_pidstore.resolver import Resolver from . import blueprint +from cap.modules.access.utils import login_required +from cap.modules.deposit.api import CAPDeposit def _get_zenodo_record(zenodo_id): @@ -47,3 +50,18 @@ def get_zenodo_record(zenodo_id): """Get record from zenodo (route).""" resp, status = _get_zenodo_record(zenodo_id) return jsonify(resp), status + + +@blueprint.route('/zenodo/tasks/') +@login_required +def get_zenodo_tasks(depid): + """Get record from zenodo (route).""" + resolver = Resolver(pid_type='depid', + object_type='rec', + getter=lambda x: x) + + _, uuid = resolver.resolve(depid) + record = CAPDeposit.get_record(uuid) + tasks = record.get('_zenodo', {}).get('tasks', []) + + return jsonify(tasks), 200 diff --git a/tests/integration/test_zenodo_upload.py b/tests/integration/test_zenodo_upload.py index 4da17e0fb1..92ae21c517 100644 --- a/tests/integration/test_zenodo_upload.py +++ b/tests/integration/test_zenodo_upload.py @@ -104,7 +104,7 @@ def test_create_and_upload_to_zenodo(mock_token, app, users, deposit_with_file, assert len(record['_zenodo']) == 1 assert record['_zenodo'][0]['id'] == 111 - assert record['_zenodo'][0]['title'] == None + assert record['_zenodo'][0]['title'] is None assert record['_zenodo'][0]['created'] == '2020-11-20T11:49:39.147767+00:00' @@ -382,8 +382,7 @@ def test_zenodo_upload_file_not_uploaded_error(mock_token, app, users, deposit_w assert resp.status_code == 201 captured = capsys.readouterr() - assert 'Uploading file test-file.txt to deposit 111 failed with 500' \ - in captured.err + assert 'Zenodo upload of file `test-file.txt`: 500.' in captured.err @patch('cap.modules.deposit.api._fetch_token', return_value='test-token')