From d2c890bc32872be880c8c1960b9ddbf017a1033c Mon Sep 17 00:00:00 2001 From: Jan Range Date: Fri, 19 Jul 2024 17:18:00 +0200 Subject: [PATCH 1/6] fix jsonData passed to false kwarg --- pyDataverse/api.py | 42 ++++++++++++++++++++++++++++++++++++---- tests/api/test_upload.py | 33 +++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/pyDataverse/api.py b/pyDataverse/api.py index efd3186..23b0493 100644 --- a/pyDataverse/api.py +++ b/pyDataverse/api.py @@ -167,24 +167,28 @@ def post_request(self, url, data=None, auth=False, params=None, files=None): if self.api_token: params["key"] = self.api_token + if isinstance(data, str): data = json.loads(data) + # Decide whether to use 'data' or 'json' args + request_params = self._check_json_data_form(data) + if self.client is None: return self._sync_request( method=httpx.post, url=url, - json=data, params=params, files=files, + **request_params ) else: return self._async_request( method=self.client.post, url=url, - json=data, params=params, files=files, + **request_params, ) def put_request(self, url, data=None, auth=False, params=None): @@ -216,19 +220,22 @@ def put_request(self, url, data=None, auth=False, params=None): if isinstance(data, str): data = json.loads(data) + # Decide whether to use 'data' or 'json' args + request_params = self._check_json_data_form(data) + if self.client is None: return self._sync_request( method=httpx.put, url=url, - json=data, params=params, + **request_params, ) else: return self._async_request( method=self.client.put, url=url, - json=data, params=params, + **request_params, ) def delete_request(self, url, auth=False, params=None): @@ -268,6 +275,33 @@ def delete_request(self, url, auth=False, params=None): params=params, ) + @staticmethod + def _check_json_data_form(data: Optional[Dict]): + """This method checks and distributes given payload to match Dataverse expectations. + + In the case of the form-data keyed by "jsonData", Dataverse expects + the payload as a string in a form of a dictionary. This is not possible + using HTTPXs json parameter, so we need to handle this case separately. + """ + + if not data: + return {} + elif not isinstance(data, dict): + raise ValueError("Data must be a dictionary.") + elif "jsonData" not in data: + return {"json": data} + + assert list(data.keys()) == ["jsonData"], ( + "jsonData must be the only key in the dictionary." + ) + + # Content of JSON data should ideally be a string + content = data["jsonData"] + if not isinstance(content, str): + data["jsonData"] = json.dumps(content) + + return {"data": data} + def _sync_request( self, method, diff --git a/tests/api/test_upload.py b/tests/api/test_upload.py index 34af3a4..4b8f5f2 100644 --- a/tests/api/test_upload.py +++ b/tests/api/test_upload.py @@ -141,6 +141,7 @@ def test_file_replacement(self): "description": "My description.", "categories": ["Data"], "forceReplace": False, + "directoryLabel": "some/other", } response = api.replace_datafile( @@ -152,12 +153,17 @@ def test_file_replacement(self): # Assert replaced_id = self._get_file_id(BASE_URL, API_TOKEN, pid) + file_metadata = self._get_file_metadata(BASE_URL, API_TOKEN, replaced_id) + data_file = file_metadata["dataFile"] replaced_content = self._fetch_datafile_content( BASE_URL, API_TOKEN, replaced_id, ) + assert data_file["description"] == "My description.", "Description does not match." + assert data_file["categories"] == ["Data"], "Categories do not match." + assert file_metadata["directoryLabel"] == "some/other", "Directory label does not match." assert response.status_code == 200, "File replacement failed." assert ( replaced_content == mutated @@ -246,3 +252,30 @@ def _fetch_datafile_content( response.raise_for_status() return response.content.decode("utf-8") + + + @staticmethod + def _get_file_metadata( + BASE_URL: str, + API_TOKEN: str, + id: str, + ): + """ + Retrieves the metadata for a file in Dataverse. + + Args: + BASE_URL (str): The base URL of the Dataverse instance. + API_TOKEN (str): The API token for authentication. + id (str): The ID of the file. + + Returns: + dict: The metadata for the file. + """ + response = httpx.get( + url=f"{BASE_URL}/api/files/{id}", + headers={"X-Dataverse-key": API_TOKEN}, + ) + + response.raise_for_status() + + return response.json()["data"] From eaead2389d06a584e1a06bf730b1f7a7a407037e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20H=C3=B6ffner?= Date: Wed, 24 Jul 2024 22:23:45 +0200 Subject: [PATCH 2/6] Fix handling of empty metadata Relates to datalad/datalad-dataverse#320 --- pyDataverse/api.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyDataverse/api.py b/pyDataverse/api.py index 23b0493..e45b18e 100644 --- a/pyDataverse/api.py +++ b/pyDataverse/api.py @@ -1847,9 +1847,10 @@ def upload_datafile(self, identifier, filename, json_str=None, is_pid=True): url += "/datasets/{0}/add".format(identifier) files = {"file": open(filename, "rb")} - return self.post_request( - url, data={"jsonData": json_str}, files=files, auth=True - ) + metadata = {} + if json_str is not None: + metadata["jsonData"] = json_str + return self.post_request(url, data=metadata, files=files, auth=True) def update_datafile_metadata(self, identifier, json_str=None, is_filepid=False): """Update datafile metadata. From d08892fd92d5fe186c31dea597a5e1ea8f264deb Mon Sep 17 00:00:00 2001 From: Jan Range Date: Tue, 30 Jul 2024 10:43:16 +0200 Subject: [PATCH 3/6] ignore JetBrains `.idea` --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2da1979..2630f63 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,6 @@ poetry.lock # Ruff .ruff_cache/ + +# JetBrains +.idea/ From 8e0a521f071525a7b030e32c355ab7f1d3f0c7f8 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Tue, 30 Jul 2024 10:44:08 +0200 Subject: [PATCH 4/6] ruff reformatting --- pyDataverse/api.py | 13 ++++--------- tests/api/test_async_api.py | 2 -- tests/models/test_datafile.py | 20 +++++++++++++++---- tests/models/test_dataset.py | 31 +++++++++++++++++++++++------- tests/models/test_dataverse.py | 35 +++++++++++++++++++++++++++------- tests/models/test_dvobject.py | 1 + 6 files changed, 73 insertions(+), 29 deletions(-) diff --git a/pyDataverse/api.py b/pyDataverse/api.py index e45b18e..f7a4a21 100644 --- a/pyDataverse/api.py +++ b/pyDataverse/api.py @@ -167,7 +167,6 @@ def post_request(self, url, data=None, auth=False, params=None, files=None): if self.api_token: params["key"] = self.api_token - if isinstance(data, str): data = json.loads(data) @@ -176,11 +175,7 @@ def post_request(self, url, data=None, auth=False, params=None, files=None): if self.client is None: return self._sync_request( - method=httpx.post, - url=url, - params=params, - files=files, - **request_params + method=httpx.post, url=url, params=params, files=files, **request_params ) else: return self._async_request( @@ -291,9 +286,9 @@ def _check_json_data_form(data: Optional[Dict]): elif "jsonData" not in data: return {"json": data} - assert list(data.keys()) == ["jsonData"], ( - "jsonData must be the only key in the dictionary." - ) + assert list(data.keys()) == [ + "jsonData" + ], "jsonData must be the only key in the dictionary." # Content of JSON data should ideally be a string content = data["jsonData"] diff --git a/tests/api/test_async_api.py b/tests/api/test_async_api.py index 61b1910..12c4e3a 100644 --- a/tests/api/test_async_api.py +++ b/tests/api/test_async_api.py @@ -3,10 +3,8 @@ class TestAsyncAPI: - @pytest.mark.asyncio async def test_async_api(self, native_api): - async with native_api: tasks = [native_api.get_info_version() for _ in range(10)] responses = await asyncio.gather(*tasks) diff --git a/tests/models/test_datafile.py b/tests/models/test_datafile.py index 55793da..7fec1c5 100644 --- a/tests/models/test_datafile.py +++ b/tests/models/test_datafile.py @@ -1,4 +1,5 @@ """Datafile data model tests.""" + import json import jsonschema import os @@ -244,7 +245,10 @@ def test_datafile_from_json_valid(self): ), (({json_upload_min()}, {"validate": False}), object_data_min()), ( - ({json_upload_min()}, {"filename_schema": "wrong", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "wrong", "validate": False}, + ), object_data_min(), ), ( @@ -345,7 +349,10 @@ def test_datafile_to_json_valid(self): json.loads(json_upload_min()), ), ( - (dict_flat_set_min(), {"filename_schema": "wrong", "validate": False},), + ( + dict_flat_set_min(), + {"filename_schema": "wrong", "validate": False}, + ), json.loads(json_upload_min()), ), ( @@ -517,7 +524,10 @@ def test_dataverse_from_json_to_json_valid(self): ({json_upload_full()}, {}), ({json_upload_min()}, {"data_format": "dataverse_upload"}), ({json_upload_min()}, {"validate": False}), - ({json_upload_min()}, {"filename_schema": "wrong", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "wrong", "validate": False}, + ), ( {json_upload_min()}, { @@ -550,4 +560,6 @@ def test_dataverse_from_json_to_json_valid(self): for key, val in pdv_end.get().items(): assert getattr(pdv_start, key) == getattr(pdv_end, key) - assert len(pdv_start.__dict__) == len(pdv_end.__dict__,) + assert len(pdv_start.__dict__) == len( + pdv_end.__dict__, + ) diff --git a/tests/models/test_dataset.py b/tests/models/test_dataset.py index fb01d0f..601bb15 100644 --- a/tests/models/test_dataset.py +++ b/tests/models/test_dataset.py @@ -1,4 +1,5 @@ """Dataset data model tests.""" + import json import os import platform @@ -950,11 +951,17 @@ def test_dataset_from_json_valid(self): ), (({json_upload_min()}, {"validate": False}), object_data_min()), ( - ({json_upload_min()}, {"filename_schema": "", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "", "validate": False}, + ), object_data_min(), ), ( - ({json_upload_min()}, {"filename_schema": "wrong", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "wrong", "validate": False}, + ), object_data_min(), ), ( @@ -995,11 +1002,17 @@ def test_dataset_to_json_valid(self): json.loads(json_upload_min()), ), ( - (dict_flat_set_min(), {"filename_schema": "", "validate": False},), + ( + dict_flat_set_min(), + {"filename_schema": "", "validate": False}, + ), json.loads(json_upload_min()), ), ( - (dict_flat_set_min(), {"filename_schema": "wrong", "validate": False},), + ( + dict_flat_set_min(), + {"filename_schema": "wrong", "validate": False}, + ), json.loads(json_upload_min()), ), ( @@ -1167,7 +1180,10 @@ def test_dataset_to_json_from_json_valid(self): (dict_flat_set_full(), {}), (dict_flat_set_min(), {"data_format": "dataverse_upload"}), (dict_flat_set_min(), {"validate": False}), - (dict_flat_set_min(), {"filename_schema": "wrong", "validate": False},), + ( + dict_flat_set_min(), + {"filename_schema": "wrong", "validate": False}, + ), ( dict_flat_set_min(), { @@ -1180,7 +1196,6 @@ def test_dataset_to_json_from_json_valid(self): ] for data_set, kwargs_from in data: - kwargs = {} pdv_start = data_object() pdv_start.set(data_set) @@ -1200,4 +1215,6 @@ def test_dataset_to_json_from_json_valid(self): for key, val in pdv_end.get().items(): assert getattr(pdv_start, key) == getattr(pdv_end, key) - assert len(pdv_start.__dict__) == len(pdv_end.__dict__,) + assert len(pdv_start.__dict__) == len( + pdv_end.__dict__, + ) diff --git a/tests/models/test_dataverse.py b/tests/models/test_dataverse.py index a4c362e..946fdc6 100644 --- a/tests/models/test_dataverse.py +++ b/tests/models/test_dataverse.py @@ -1,4 +1,5 @@ """Dataverse data model tests.""" + import json import jsonschema import os @@ -296,11 +297,17 @@ def test_dataverse_from_json_valid(self): ), (({json_upload_min()}, {"validate": False}), object_data_min()), ( - ({json_upload_min()}, {"filename_schema": "", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "", "validate": False}, + ), object_data_min(), ), ( - ({json_upload_min()}, {"filename_schema": "wrong", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "wrong", "validate": False}, + ), object_data_min(), ), ( @@ -401,11 +408,17 @@ def test_dataverse_to_json_valid(self): json.loads(json_upload_min()), ), ( - (dict_flat_set_min(), {"filename_schema": "", "validate": False},), + ( + dict_flat_set_min(), + {"filename_schema": "", "validate": False}, + ), json.loads(json_upload_min()), ), ( - (dict_flat_set_min(), {"filename_schema": "wrong", "validate": False},), + ( + dict_flat_set_min(), + {"filename_schema": "wrong", "validate": False}, + ), json.loads(json_upload_min()), ), ( @@ -583,8 +596,14 @@ def test_dataverse_from_json_to_json_valid(self): ({json_upload_full()}, {}), ({json_upload_min()}, {"data_format": "dataverse_upload"}), ({json_upload_min()}, {"validate": False}), - ({json_upload_min()}, {"filename_schema": "", "validate": False},), - ({json_upload_min()}, {"filename_schema": "wrong", "validate": False},), + ( + {json_upload_min()}, + {"filename_schema": "", "validate": False}, + ), + ( + {json_upload_min()}, + {"filename_schema": "wrong", "validate": False}, + ), ( {json_upload_min()}, { @@ -614,4 +633,6 @@ def test_dataverse_from_json_to_json_valid(self): for key, val in pdv_end.get().items(): assert getattr(pdv_start, key) == getattr(pdv_end, key) - assert len(pdv_start.__dict__) == len(pdv_end.__dict__,) + assert len(pdv_start.__dict__) == len( + pdv_end.__dict__, + ) diff --git a/tests/models/test_dvobject.py b/tests/models/test_dvobject.py index 26e96d3..9164011 100644 --- a/tests/models/test_dvobject.py +++ b/tests/models/test_dvobject.py @@ -1,4 +1,5 @@ """Dataverse data model tests.""" + from pyDataverse.models import DVObject From f148ef394938083ec24858a66885bc044428514f Mon Sep 17 00:00:00 2001 From: Jan Range Date: Tue, 30 Jul 2024 10:44:24 +0200 Subject: [PATCH 5/6] add test case where `json_str` is set to `None` --- tests/api/test_upload.py | 44 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/tests/api/test_upload.py b/tests/api/test_upload.py index 4b8f5f2..aa2b1d9 100644 --- a/tests/api/test_upload.py +++ b/tests/api/test_upload.py @@ -45,6 +45,41 @@ def test_file_upload(self): # Assert assert response.status_code == 200, "File upload failed." + def test_file_upload_without_metadata(self): + """ + Test case for uploading a file to a dataset without metadata. + + --> json_str will be set as None + + This test case performs the following steps: + 1. Creates a dataset using the provided metadata. + 2. Prepares a file for upload. + 3. Uploads the file to the dataset. + 4. Asserts that the file upload was successful. + + Raises: + AssertionError: If the file upload fails. + + """ + # Arrange + BASE_URL = os.getenv("BASE_URL").rstrip("/") + API_TOKEN = os.getenv("API_TOKEN") + + # Create dataset + metadata = json.load(open("tests/data/file_upload_ds_minimum.json")) + pid = self._create_dataset(BASE_URL, API_TOKEN, metadata) + api = NativeApi(BASE_URL, API_TOKEN) + + # Act + response = api.upload_datafile( + identifier=pid, + filename="tests/data/datafile.txt", + json_str=None, + ) + + # Assert + assert response.status_code == 200, "File upload failed." + def test_bulk_file_upload(self, create_mock_file): """ Test case for uploading bulk files to a dataset. @@ -161,9 +196,13 @@ def test_file_replacement(self): replaced_id, ) - assert data_file["description"] == "My description.", "Description does not match." + assert ( + data_file["description"] == "My description." + ), "Description does not match." assert data_file["categories"] == ["Data"], "Categories do not match." - assert file_metadata["directoryLabel"] == "some/other", "Directory label does not match." + assert ( + file_metadata["directoryLabel"] == "some/other" + ), "Directory label does not match." assert response.status_code == 200, "File replacement failed." assert ( replaced_content == mutated @@ -253,7 +292,6 @@ def _fetch_datafile_content( return response.content.decode("utf-8") - @staticmethod def _get_file_metadata( BASE_URL: str, From 484c3665d31d0e6e271d20cd4dcdbbf46c2ddf85 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Wed, 21 Aug 2024 12:15:26 +0200 Subject: [PATCH 6/6] use pydataverse in test setup --- tests/api/test_upload.py | 162 ++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 95 deletions(-) diff --git a/tests/api/test_upload.py b/tests/api/test_upload.py index aa2b1d9..e8d8fef 100644 --- a/tests/api/test_upload.py +++ b/tests/api/test_upload.py @@ -4,7 +4,7 @@ import httpx -from pyDataverse.api import NativeApi +from pyDataverse.api import DataAccessApi, NativeApi from pyDataverse.models import Datafile @@ -132,9 +132,9 @@ def test_bulk_file_upload(self, create_mock_file): # Assert assert response.status_code == 200, "File upload failed." - def test_file_replacement(self): + def test_file_replacement_wo_metadata(self): """ - Test case for replacing a file in a dataset. + Test case for replacing a file in a dataset without metadata. Steps: 1. Create a dataset using the provided metadata. @@ -151,6 +151,7 @@ def test_file_replacement(self): metadata = json.load(open("tests/data/file_upload_ds_minimum.json")) pid = self._create_dataset(BASE_URL, API_TOKEN, metadata) api = NativeApi(BASE_URL, API_TOKEN) + data_api = DataAccessApi(BASE_URL, API_TOKEN) # Perform file upload df = Datafile({"pid": pid, "filename": "datafile.txt"}) @@ -161,7 +162,64 @@ def test_file_replacement(self): ) # Retrieve file ID - file_id = self._get_file_id(BASE_URL, API_TOKEN, pid) + file_id = response.json()["data"]["files"][0]["dataFile"]["id"] + + # Act + with tempfile.TemporaryDirectory() as tempdir: + original = open("tests/data/replace.xyz").read() + mutated = "Z" + original[1::] + mutated_path = os.path.join(tempdir, "replace.xyz") + + with open(mutated_path, "w") as f: + f.write(mutated) + + json_data = {} + + response = api.replace_datafile( + identifier=file_id, + filename=mutated_path, + json_str=json.dumps(json_data), + is_filepid=False, + ) + + # Assert + file_id = response.json()["data"]["files"][0]["dataFile"]["id"] + content = data_api.get_datafile(file_id, is_pid=False).text + + assert response.status_code == 200, "File replacement failed." + assert content == mutated, "File content does not match the expected content." + + def test_file_replacement_w_metadata(self): + """ + Test case for replacing a file in a dataset with metadata. + + Steps: + 1. Create a dataset using the provided metadata. + 2. Upload a datafile to the dataset. + 3. Replace the uploaded datafile with a mutated version. + 4. Verify that the file replacement was successful and the content matches the expected content. + """ + + # Arrange + BASE_URL = os.getenv("BASE_URL").rstrip("/") + API_TOKEN = os.getenv("API_TOKEN") + + # Create dataset + metadata = json.load(open("tests/data/file_upload_ds_minimum.json")) + pid = self._create_dataset(BASE_URL, API_TOKEN, metadata) + api = NativeApi(BASE_URL, API_TOKEN) + data_api = DataAccessApi(BASE_URL, API_TOKEN) + + # Perform file upload + df = Datafile({"pid": pid, "filename": "datafile.txt"}) + response = api.upload_datafile( + identifier=pid, + filename="tests/data/replace.xyz", + json_str=df.json(), + ) + + # Retrieve file ID + file_id = response.json()["data"]["files"][0]["dataFile"]["id"] # Act with tempfile.TemporaryDirectory() as tempdir: @@ -187,26 +245,19 @@ def test_file_replacement(self): ) # Assert - replaced_id = self._get_file_id(BASE_URL, API_TOKEN, pid) - file_metadata = self._get_file_metadata(BASE_URL, API_TOKEN, replaced_id) - data_file = file_metadata["dataFile"] - replaced_content = self._fetch_datafile_content( - BASE_URL, - API_TOKEN, - replaced_id, - ) + file_id = response.json()["data"]["files"][0]["dataFile"]["id"] + data_file = api.get_dataset(pid).json()["data"]["latestVersion"]["files"][0] + content = data_api.get_datafile(file_id, is_pid=False).text assert ( data_file["description"] == "My description." ), "Description does not match." assert data_file["categories"] == ["Data"], "Categories do not match." assert ( - file_metadata["directoryLabel"] == "some/other" + data_file["directoryLabel"] == "some/other" ), "Directory label does not match." assert response.status_code == 200, "File replacement failed." - assert ( - replaced_content == mutated - ), "File content does not match the expected content." + assert content == mutated, "File content does not match the expected content." @staticmethod def _create_dataset( @@ -238,82 +289,3 @@ def _create_dataset( response.raise_for_status() return response.json()["data"]["persistentId"] - - @staticmethod - def _get_file_id( - BASE_URL: str, - API_TOKEN: str, - pid: str, - ): - """ - Retrieves the file ID for a given persistent identifier (PID) in Dataverse. - - Args: - BASE_URL (str): The base URL of the Dataverse instance. - API_TOKEN (str): The API token for authentication. - pid (str): The persistent identifier (PID) of the dataset. - - Returns: - str: The file ID of the latest version of the dataset. - - Raises: - HTTPError: If the HTTP request to retrieve the file ID fails. - """ - response = httpx.get( - url=f"{BASE_URL}/api/datasets/:persistentId/?persistentId={pid}", - headers={"X-Dataverse-key": API_TOKEN}, - ) - - response.raise_for_status() - - return response.json()["data"]["latestVersion"]["files"][0]["dataFile"]["id"] - - @staticmethod - def _fetch_datafile_content( - BASE_URL: str, - API_TOKEN: str, - id: str, - ): - """ - Fetches the content of a datafile from the specified BASE_URL using the provided API_TOKEN. - - Args: - BASE_URL (str): The base URL of the Dataverse instance. - API_TOKEN (str): The API token for authentication. - id (str): The ID of the datafile. - - Returns: - str: The content of the datafile as a decoded UTF-8 string. - """ - url = f"{BASE_URL}/api/access/datafile/{id}" - headers = {"X-Dataverse-key": API_TOKEN} - response = httpx.get(url, headers=headers) - response.raise_for_status() - - return response.content.decode("utf-8") - - @staticmethod - def _get_file_metadata( - BASE_URL: str, - API_TOKEN: str, - id: str, - ): - """ - Retrieves the metadata for a file in Dataverse. - - Args: - BASE_URL (str): The base URL of the Dataverse instance. - API_TOKEN (str): The API token for authentication. - id (str): The ID of the file. - - Returns: - dict: The metadata for the file. - """ - response = httpx.get( - url=f"{BASE_URL}/api/files/{id}", - headers={"X-Dataverse-key": API_TOKEN}, - ) - - response.raise_for_status() - - return response.json()["data"]