From a6ec71dc3f4df1224da362829bac9b80825e44fd Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Wed, 26 Jul 2023 09:36:53 -0400 Subject: [PATCH 01/17] implement created case for user wpsoutputs data --- cowbird/handlers/impl/filesystem.py | 33 ++++++-- tests/test_filesystem.py | 118 ++++++++++++++++++++++------ 2 files changed, 122 insertions(+), 29 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index d07839c0..88f73b35 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -15,6 +15,7 @@ LOGGER = get_logger(__name__) NOTEBOOKS_DIR_NAME = "notebooks" +USER_WPS_OUTPUTS_USER_DIR_NAME = "wpsoutputs" class FileSystem(Handler, FSMonitor): @@ -51,7 +52,7 @@ def __init__(self, # Regex to find any directory or file found in the `users` output path of a 'bird' service # {self.wps_outputs_dir}//users//... - self.wps_outputs_users_regex = rf"^{self.wps_outputs_dir}/\w+/users/(\d+)/(.+)" + self.wps_outputs_users_regex = rf"^{self.wps_outputs_dir}/(\w+)/users/(\d+)/(.+)" def start_wpsoutputs_monitoring(self, monitoring: Monitoring) -> None: if os.path.exists(self.wps_outputs_dir): @@ -66,6 +67,9 @@ def get_resource_id(self, resource_full_name: str) -> int: def _get_user_workspace_dir(self, user_name: str) -> str: return os.path.join(self.workspace_dir, user_name) + def _get_wps_outputs_user_dir(self, user_name: str) -> str: + return os.path.join(self._get_user_workspace_dir(user_name), USER_WPS_OUTPUTS_USER_DIR_NAME) + def get_wps_outputs_public_dir(self) -> str: return os.path.join(self.workspace_dir, self.wps_outputs_public_subdir) @@ -120,10 +124,29 @@ def _get_public_hardlink(self, src_path: str) -> str: subpath = os.path.relpath(src_path, self.wps_outputs_dir) return os.path.join(self.get_wps_outputs_public_dir(), subpath) + def _get_user_hardlink(self, src_path, regex_match): + bird_name = regex_match.group(1) + user_id = int(regex_match.group(2)) + subpath = os.path.join(bird_name, regex_match.group(3)) + + magpie_handler = HandlerFactory().get_handler("Magpie") + user_name = magpie_handler.get_user_name_from_user_id(user_id) + user_workspace_dir = self._get_user_workspace_dir(user_name) + + if not os.path.exists(user_workspace_dir): + raise FileNotFoundError(f"User {user_name} workspace not found at path {user_workspace_dir}. New " + f"wpsoutput {src_path} not added to the user workspace.") + + # TODO: apply call to magpie/secure-data-proxy, remove link if exists and no permission, + # add link if doesn't exists and permission + + return os.path.join(self._get_wps_outputs_user_dir(user_name), subpath) + def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False) -> None: regex_match = re.search(self.wps_outputs_users_regex, src_path) - if regex_match: # user files # pylint: disable=R1705,no-else-return - return # TODO: implement user hardlinks + if regex_match: # user files + hardlink_path = self._get_user_hardlink(src_path, regex_match) + # TODO: check what to do if no access permitted and a hardlink already exists else: # public files hardlink_path = self._get_public_hardlink(src_path) @@ -172,8 +195,8 @@ def on_deleted(self, path: str) -> None: LOGGER.info("Removing link associated to the deleted path `%s`", path) regex_match = re.search(self.wps_outputs_users_regex, path) try: - if regex_match: # user paths # pylint: disable=R1705,no-else-return - return # TODO: implement user hardlinks + if regex_match: # user paths + linked_path = self._get_user_hardlink(path, regex_match) else: # public paths linked_path = self._get_public_hardlink(path) if os.path.isdir(linked_path): diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 5d536274..e87aef2e 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -8,12 +8,13 @@ import pytest import yaml +from dotenv import load_dotenv from webtest.app import TestApp from cowbird.handlers import HandlerFactory from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME from cowbird.typedefs import JSON -from tests import utils +from tests import utils, test_magpie CURR_DIR = Path(__file__).resolve().parent @@ -164,6 +165,34 @@ def test_create_user_missing_workspace_dir(self, mock_head_request): # The callback url should have been called if an exception occurred during the handler's operations. mock_head_request.assert_called_with(self.callback_url, verify=True, timeout=5) + @staticmethod + def check_created_test_cases(output_path, hardlink_path): + """ + Runs multiple test cases for the creation of hardlinks, which are the same for public and user files. + """ + # Make sure the hardlink doesn't already exist for the test cases + Path(hardlink_path).unlink(missing_ok=True) + + filesystem_handler = HandlerFactory().get_handler("FileSystem") + filesystem_handler.on_created(output_path) + assert os.stat(hardlink_path).st_nlink == 2 + + # A create event should still work if the target directory already exists + os.remove(hardlink_path) + filesystem_handler.on_created(output_path) + assert os.stat(hardlink_path).st_nlink == 2 + + # A create event should replace a hardlink path with the new file if the target path already exists + os.remove(hardlink_path) + with open(hardlink_path, mode="w", encoding="utf-8"): + pass + original_hardlink_ino = Path(hardlink_path).stat().st_ino + filesystem_handler.on_created(output_path) + new_hardlink_ino = Path(hardlink_path).stat().st_ino + assert original_hardlink_ino != new_hardlink_ino + assert Path(output_path).stat().st_ino == new_hardlink_ino + assert os.stat(hardlink_path).st_nlink == 2 + def test_public_wps_output_created(self): """ Tests creating a public wps output file. @@ -177,39 +206,23 @@ def test_public_wps_output_created(self): "wps_outputs_dir": self.wpsoutputs_dir}}}) # Create a test wps output file - output_subpath = "weaver/test_output.txt" + bird_name = "weaver" + output_subpath = f"{bird_name}/test_output.txt" output_file = os.path.join(self.wpsoutputs_dir, output_subpath) os.makedirs(os.path.dirname(output_file)) with open(output_file, mode="w", encoding="utf-8"): pass filesystem_handler = HandlerFactory().get_handler("FileSystem") - filesystem_handler.on_created(output_file) - hardlink_path = os.path.join(filesystem_handler.get_wps_outputs_public_dir(), output_subpath) - assert os.stat(hardlink_path).st_nlink == 2 - # A create event should still work if the target directory already exists - os.remove(hardlink_path) - filesystem_handler.on_created(output_file) - assert os.stat(hardlink_path).st_nlink == 2 - - # A create event should replace a hardlink path with the new file if the target path already exists - os.remove(hardlink_path) - with open(hardlink_path, mode="w", encoding="utf-8"): - pass - original_hardlink_ino = Path(hardlink_path).stat().st_ino - filesystem_handler.on_created(output_file) - new_hardlink_ino = Path(hardlink_path).stat().st_ino - assert original_hardlink_ino != new_hardlink_ino - assert Path(output_file).stat().st_ino == new_hardlink_ino - assert os.stat(hardlink_path).st_nlink == 2 + TestFileSystem.check_created_test_cases(output_file, hardlink_path) # A create event on a folder should not be processed (no corresponding target folder created) - target_weaver_dir = os.path.join(filesystem_handler.get_wps_outputs_public_dir(), "weaver") - shutil.rmtree(target_weaver_dir) - filesystem_handler.on_created(os.path.join(self.wpsoutputs_dir, "weaver")) - assert not os.path.exists(target_weaver_dir) + target_dir = os.path.join(filesystem_handler.get_wps_outputs_public_dir(), bird_name) + shutil.rmtree(target_dir) + filesystem_handler.on_created(os.path.join(self.wpsoutputs_dir, bird_name)) + assert not os.path.exists(target_dir) def test_public_wps_output_deleted(self): """ @@ -250,6 +263,63 @@ def test_public_wps_output_deleted(self): filesystem_handler.on_deleted(os.path.join(self.wpsoutputs_dir, "weaver")) assert not os.path.exists(weaver_linked_dir) + def test_user_wps_output_created(self): + """ + Tests creating a wps output for a user. + """ + load_dotenv(CURR_DIR / "../docker/.env.example") + self.get_test_app({ + "handlers": { + "Magpie": { + "active": True, + "url": os.getenv("COWBIRD_TEST_MAGPIE_URL"), + "admin_user": os.getenv("MAGPIE_ADMIN_USER"), + "admin_password": os.getenv("MAGPIE_ADMIN_PASSWORD")}, + "FileSystem": { + "active": True, + "workspace_dir": self.workspace_dir, + "jupyterhub_user_data_dir": self.jupyterhub_user_data_dir, + "wps_outputs_dir": self.wpsoutputs_dir}}}) + + # Reset test user + magpie_test_user = "test_user" + magpie_handler = HandlerFactory().get_handler("Magpie") + test_magpie.delete_user(magpie_handler, magpie_test_user) + user_id = test_magpie.create_user(magpie_handler, magpie_test_user, + "test@test.com", "qwertyqwerty", "users") + job_id = 1 + + bird_name = "weaver" + output_subpath = f"{job_id}/test_output.txt" + output_file = os.path.join(self.wpsoutputs_dir, f"{bird_name}/users/{user_id}/{output_subpath}") + # Hardlink for user files doesn't use the full subpath, but removes the redundant `users` and `{user_id}` parts. + hardlink_subpath = f"{bird_name}/{output_subpath}" + + # Create a test wps output file + os.makedirs(os.path.dirname(output_file)) + with open(output_file, mode="w", encoding="utf-8"): + pass + + filesystem_handler = HandlerFactory().get_handler("FileSystem") + # Error expected if the user workspace does not exist + with pytest.raises(FileNotFoundError): + filesystem_handler.on_created(output_file) + + # Create the user workspace + filesystem_handler.user_created(magpie_test_user) + + wps_outputs_user_dir = filesystem_handler._get_wps_outputs_user_dir(magpie_test_user) + hardlink_path = os.path.join(wps_outputs_user_dir, hardlink_subpath) + + self.check_created_test_cases(output_file, hardlink_path) + + # A create event on a folder should not be processed (no corresponding target folder created) + src_dir = os.path.join(self.wpsoutputs_dir, f"{bird_name}/users/{user_id}/{job_id}") + target_dir = os.path.join(wps_outputs_user_dir, f"{bird_name}/{job_id}") + shutil.rmtree(target_dir) + filesystem_handler.on_created(src_dir) + assert not os.path.exists(target_dir) + def test_resync(self): """ Tests resync operation for the handler. From ce64cc344cdc7055be55aca72f16fd83f6b4115d Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Fri, 28 Jul 2023 09:44:39 -0400 Subject: [PATCH 02/17] add support for a secure-data-proxy service in Magpie to handle user permissions on wpsoutputs data --- cowbird/handlers/impl/filesystem.py | 117 ++++++++--- tests/test_filesystem.py | 296 +++++++++++++++++++--------- 2 files changed, 300 insertions(+), 113 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index 88f73b35..b72c5102 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -1,8 +1,13 @@ import os import re import shutil +import stat from pathlib import Path -from typing import Any +from typing import Any, Tuple, cast + +from magpie.permissions import Access +from magpie.permissions import Permission as MagpiePermission +from magpie.services import ServiceAPI from cowbird.handlers import HandlerFactory from cowbird.handlers.handler import HANDLER_WORKSPACE_DIR_PARAM, Handler @@ -10,11 +15,12 @@ from cowbird.monitoring.monitoring import Monitoring from cowbird.permissions_synchronizer import Permission from cowbird.typedefs import SettingsType -from cowbird.utils import get_logger +from cowbird.utils import get_logger, update_filesystem_permissions LOGGER = get_logger(__name__) NOTEBOOKS_DIR_NAME = "notebooks" +SECURE_DATA_PROXY_NAME = "secure-data-proxy" USER_WPS_OUTPUTS_USER_DIR_NAME = "wpsoutputs" @@ -67,7 +73,7 @@ def get_resource_id(self, resource_full_name: str) -> int: def _get_user_workspace_dir(self, user_name: str) -> str: return os.path.join(self.workspace_dir, user_name) - def _get_wps_outputs_user_dir(self, user_name: str) -> str: + def get_wps_outputs_user_dir(self, user_name: str) -> str: return os.path.join(self._get_user_workspace_dir(user_name), USER_WPS_OUTPUTS_USER_DIR_NAME) def get_wps_outputs_public_dir(self) -> str: @@ -124,34 +130,88 @@ def _get_public_hardlink(self, src_path: str) -> str: subpath = os.path.relpath(src_path, self.wps_outputs_dir) return os.path.join(self.get_wps_outputs_public_dir(), subpath) - def _get_user_hardlink(self, src_path, regex_match): - bird_name = regex_match.group(1) - user_id = int(regex_match.group(2)) - subpath = os.path.join(bird_name, regex_match.group(3)) - - magpie_handler = HandlerFactory().get_handler("Magpie") - user_name = magpie_handler.get_user_name_from_user_id(user_id) + def _get_user_hardlink(self, src_path: str, bird_name: str, user_name: str, subpath: str) -> str: user_workspace_dir = self._get_user_workspace_dir(user_name) - if not os.path.exists(user_workspace_dir): raise FileNotFoundError(f"User {user_name} workspace not found at path {user_workspace_dir}. New " f"wpsoutput {src_path} not added to the user workspace.") - # TODO: apply call to magpie/secure-data-proxy, remove link if exists and no permission, - # add link if doesn't exists and permission + subpath = os.path.join(bird_name, subpath) + return os.path.join(self.get_wps_outputs_user_dir(user_name), subpath) - return os.path.join(self._get_wps_outputs_user_dir(user_name), subpath) + def _get_secure_data_proxy_file_perms(self, src_path: str, user_name: str) -> Tuple[bool, bool]: + """ + Finds a route from the `secure-data-proxy service` that matches the resource path (or one its parent resource) + and gets the user permissions on that route. + """ + magpie_handler = HandlerFactory().get_handler("Magpie") + sdp_svc_info = magpie_handler.get_service_info("secure-data-proxy") + # Find the closest related route resource + expected_route = re.sub(rf"^{self.wps_outputs_dir}", "wpsoutputs", src_path) + + # Finds the resource id of the route matching the resource or the closest matching parent route. + closest_res_id = None + resource = magpie_handler.get_resource(cast(int, sdp_svc_info["resource_id"])) + for segment in expected_route.split("/"): + child_res_id = None + for child in resource["children"].values(): + if child["resource_name"] == segment: + child_res_id = cast(int, child["resource_id"]) + resource = child + break + if not child_res_id: + break + closest_res_id = child_res_id + + if not closest_res_id: + # No resource was found to be corresponding to even some part of the expected route. + # Assume access is not allowed. + is_readable = False + is_writable = False + else: + # Resolve permissions + res_perms = magpie_handler.get_user_permissions_by_res_id(user=user_name, + res_id=closest_res_id, + effective=True)["permissions"] + read_access = [perm["access"] for perm in res_perms + if perm["name"] == MagpiePermission.READ.value][0] + write_access = [perm["access"] for perm in res_perms + if perm["name"] == MagpiePermission.WRITE.value][0] + is_readable = read_access == Access.ALLOW.value + is_writable = write_access == Access.ALLOW.value + return is_readable, is_writable def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False) -> None: regex_match = re.search(self.wps_outputs_users_regex, src_path) + access_allowed = True if regex_match: # user files - hardlink_path = self._get_user_hardlink(src_path, regex_match) - # TODO: check what to do if no access permitted and a hardlink already exists + magpie_handler = HandlerFactory().get_handler("Magpie") + user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) + hardlink_path = self._get_user_hardlink(src_path=src_path, + bird_name=regex_match.group(1), + user_name=user_name, + subpath=regex_match.group(3)) + api_services = magpie_handler.get_services_by_type(ServiceAPI.service_type) + if SECURE_DATA_PROXY_NAME not in api_services: + LOGGER.debug("`%s` service not found. Considering user wpsoutputs data as accessible by default.", + SECURE_DATA_PROXY_NAME) + else: # get and apply permissions if the secure-data-proxy exists + is_readable, is_writable = self._get_secure_data_proxy_file_perms(src_path, user_name) + + previous_perms = os.stat(src_path)[stat.ST_MODE] & 0o777 + new_perms = update_filesystem_permissions(previous_perms, + is_readable=is_readable, + is_writable=is_writable, + is_executable=False) + os.chmod(src_path, new_perms) + if not is_readable and not is_writable: + # If no permission on the file, the hardlink should not be created. + access_allowed = False else: # public files hardlink_path = self._get_public_hardlink(src_path) if os.path.exists(hardlink_path): - if not overwrite: + if not overwrite and access_allowed: # Hardlink already exists, nothing to do. return # Delete the existing file at the destination path to reset the hardlink path with the expected source. @@ -159,12 +219,16 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False) -> "created file.", hardlink_path) os.remove(hardlink_path) - os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) - LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) - try: - os.link(src_path, hardlink_path) - except Exception as exc: - LOGGER.warning("Failed to create hardlink `%s` : %s", hardlink_path, exc) + if access_allowed: + os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) + LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) + try: + os.link(src_path, hardlink_path) + except Exception as exc: + LOGGER.warning("Failed to create hardlink `%s` : %s", hardlink_path, exc) + else: + LOGGER.debug("Access to the wps output file `%s` is not allowed for the user. No hardlink created.", + src_path) def on_created(self, path: str) -> None: """ @@ -196,7 +260,12 @@ def on_deleted(self, path: str) -> None: regex_match = re.search(self.wps_outputs_users_regex, path) try: if regex_match: # user paths - linked_path = self._get_user_hardlink(path, regex_match) + magpie_handler = HandlerFactory().get_handler("Magpie") + user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) + linked_path = self._get_user_hardlink(src_path=path, + bird_name=regex_match.group(1), + user_name=user_name, + subpath=regex_match.group(3)) else: # public paths linked_path = self._get_public_hardlink(path) if os.path.isdir(linked_path): diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index e87aef2e..e37e5031 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -1,5 +1,6 @@ import logging import os +import re import shutil import tempfile import unittest @@ -9,12 +10,15 @@ import pytest import yaml from dotenv import load_dotenv +from magpie.models import Permission, Route +from magpie.permissions import Access, Scope +from magpie.services import ServiceAPI from webtest.app import TestApp from cowbird.handlers import HandlerFactory -from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME +from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME, SECURE_DATA_PROXY_NAME from cowbird.typedefs import JSON -from tests import utils, test_magpie +from tests import test_magpie, utils CURR_DIR = Path(__file__).resolve().parent @@ -22,13 +26,13 @@ @pytest.mark.filesystem class TestFileSystem(unittest.TestCase): """ - Test FileSystem operations. + Test FileSystem parent class, containing some utility functions and common setup/teardown operations. """ @classmethod def setUpClass(cls): cls.jupyterhub_user_data_dir = "/jupyterhub_user_data" - cls.test_username = "test_username" + cls.test_username = "test_user" cls.callback_url = "callback_url" # Mock monitoring to disable monitoring events and to trigger file events manually instead during tests. @@ -60,6 +64,41 @@ def get_test_app(self, cfg_data: JSON) -> TestApp: app = utils.get_test_app(settings={"cowbird.config_path": cfg_file}) return app + @staticmethod + def check_created_test_cases(output_path, hardlink_path): + """ + Runs multiple test cases for the creation of hardlinks, which are the same for public and user files. + """ + # Make sure the hardlink doesn't already exist for the test cases + Path(hardlink_path).unlink(missing_ok=True) + + # A create event on the file should create a corresponding hardlink + filesystem_handler = HandlerFactory().get_handler("FileSystem") + filesystem_handler.on_created(output_path) + assert os.stat(hardlink_path).st_nlink == 2 + + # A create event should still work if the target directory already exists + os.remove(hardlink_path) + filesystem_handler.on_created(output_path) + assert os.stat(hardlink_path).st_nlink == 2 + + # A create event should replace a hardlink path with the new file if the target path already exists + os.remove(hardlink_path) + with open(hardlink_path, mode="w", encoding="utf-8"): + pass + original_hardlink_ino = Path(hardlink_path).stat().st_ino + filesystem_handler.on_created(output_path) + new_hardlink_ino = Path(hardlink_path).stat().st_ino + assert original_hardlink_ino != new_hardlink_ino + assert Path(output_path).stat().st_ino == new_hardlink_ino + assert os.stat(hardlink_path).st_nlink == 2 + + +@pytest.mark.filesystem +class TestFileSystemBasic(TestFileSystem): + """ + Test FileSystem generic operations. + """ @patch("cowbird.api.webhooks.views.requests.head") def test_manage_user_workspace(self, mock_head_request): """ @@ -165,34 +204,6 @@ def test_create_user_missing_workspace_dir(self, mock_head_request): # The callback url should have been called if an exception occurred during the handler's operations. mock_head_request.assert_called_with(self.callback_url, verify=True, timeout=5) - @staticmethod - def check_created_test_cases(output_path, hardlink_path): - """ - Runs multiple test cases for the creation of hardlinks, which are the same for public and user files. - """ - # Make sure the hardlink doesn't already exist for the test cases - Path(hardlink_path).unlink(missing_ok=True) - - filesystem_handler = HandlerFactory().get_handler("FileSystem") - filesystem_handler.on_created(output_path) - assert os.stat(hardlink_path).st_nlink == 2 - - # A create event should still work if the target directory already exists - os.remove(hardlink_path) - filesystem_handler.on_created(output_path) - assert os.stat(hardlink_path).st_nlink == 2 - - # A create event should replace a hardlink path with the new file if the target path already exists - os.remove(hardlink_path) - with open(hardlink_path, mode="w", encoding="utf-8"): - pass - original_hardlink_ino = Path(hardlink_path).stat().st_ino - filesystem_handler.on_created(output_path) - new_hardlink_ino = Path(hardlink_path).stat().st_ino - assert original_hardlink_ino != new_hardlink_ino - assert Path(output_path).stat().st_ino == new_hardlink_ino - assert os.stat(hardlink_path).st_nlink == 2 - def test_public_wps_output_created(self): """ Tests creating a public wps output file. @@ -263,63 +274,6 @@ def test_public_wps_output_deleted(self): filesystem_handler.on_deleted(os.path.join(self.wpsoutputs_dir, "weaver")) assert not os.path.exists(weaver_linked_dir) - def test_user_wps_output_created(self): - """ - Tests creating a wps output for a user. - """ - load_dotenv(CURR_DIR / "../docker/.env.example") - self.get_test_app({ - "handlers": { - "Magpie": { - "active": True, - "url": os.getenv("COWBIRD_TEST_MAGPIE_URL"), - "admin_user": os.getenv("MAGPIE_ADMIN_USER"), - "admin_password": os.getenv("MAGPIE_ADMIN_PASSWORD")}, - "FileSystem": { - "active": True, - "workspace_dir": self.workspace_dir, - "jupyterhub_user_data_dir": self.jupyterhub_user_data_dir, - "wps_outputs_dir": self.wpsoutputs_dir}}}) - - # Reset test user - magpie_test_user = "test_user" - magpie_handler = HandlerFactory().get_handler("Magpie") - test_magpie.delete_user(magpie_handler, magpie_test_user) - user_id = test_magpie.create_user(magpie_handler, magpie_test_user, - "test@test.com", "qwertyqwerty", "users") - job_id = 1 - - bird_name = "weaver" - output_subpath = f"{job_id}/test_output.txt" - output_file = os.path.join(self.wpsoutputs_dir, f"{bird_name}/users/{user_id}/{output_subpath}") - # Hardlink for user files doesn't use the full subpath, but removes the redundant `users` and `{user_id}` parts. - hardlink_subpath = f"{bird_name}/{output_subpath}" - - # Create a test wps output file - os.makedirs(os.path.dirname(output_file)) - with open(output_file, mode="w", encoding="utf-8"): - pass - - filesystem_handler = HandlerFactory().get_handler("FileSystem") - # Error expected if the user workspace does not exist - with pytest.raises(FileNotFoundError): - filesystem_handler.on_created(output_file) - - # Create the user workspace - filesystem_handler.user_created(magpie_test_user) - - wps_outputs_user_dir = filesystem_handler._get_wps_outputs_user_dir(magpie_test_user) - hardlink_path = os.path.join(wps_outputs_user_dir, hardlink_subpath) - - self.check_created_test_cases(output_file, hardlink_path) - - # A create event on a folder should not be processed (no corresponding target folder created) - src_dir = os.path.join(self.wpsoutputs_dir, f"{bird_name}/users/{user_id}/{job_id}") - target_dir = os.path.join(wps_outputs_user_dir, f"{bird_name}/{job_id}") - shutil.rmtree(target_dir) - filesystem_handler.on_created(src_dir) - assert not os.path.exists(target_dir) - def test_resync(self): """ Tests resync operation for the handler. @@ -406,3 +360,167 @@ def test_resync_no_src_wpsoutputs(self): # Check that previous file still exists, since resyncing was skipped because of the missing source folder assert os.path.exists(old_nested_file) + + +@pytest.mark.filesystem +class TestFileSystemWpsOutputsUser(TestFileSystem): + """ + FileSystem tests specific to the user wps outputs data. + """ + def setUp(self): + super().setUp() + load_dotenv(CURR_DIR / "../docker/.env.example") + self.get_test_app({ + "handlers": { + "Magpie": { + "active": True, + "url": os.getenv("COWBIRD_TEST_MAGPIE_URL"), + "admin_user": os.getenv("MAGPIE_ADMIN_USER"), + "admin_password": os.getenv("MAGPIE_ADMIN_PASSWORD")}, + "FileSystem": { + "active": True, + "workspace_dir": self.workspace_dir, + "jupyterhub_user_data_dir": self.jupyterhub_user_data_dir, + "wps_outputs_dir": self.wpsoutputs_dir}}}) + + # Reset test user + magpie_handler = HandlerFactory().get_handler("Magpie") + test_magpie.delete_user(magpie_handler, self.test_username) + self.user_id = test_magpie.create_user(magpie_handler, self.test_username, + "test@test.com", "qwertyqwerty", "users") + + self.job_id = 1 + self.bird_name = "weaver" + self.output_subpath = f"{self.job_id}/test_output.txt" + self.output_file = os.path.join(self.wpsoutputs_dir, + f"{self.bird_name}/users/{self.user_id}/{self.output_subpath}") + filesystem_handler = HandlerFactory().get_handler("FileSystem") + self.wps_outputs_user_dir = filesystem_handler.get_wps_outputs_user_dir(self.test_username) + # Hardlink for user files doesn't use the full subpath, but removes the redundant `users` and `{user_id}` parts. + self.hardlink_path = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.output_subpath}") + + # Create the test wps output file + os.makedirs(os.path.dirname(self.output_file)) + with open(self.output_file, mode="w", encoding="utf-8"): + pass + + self.secure_data_proxy_name = SECURE_DATA_PROXY_NAME + # Delete the service if it already exists + test_magpie.delete_service(magpie_handler, self.secure_data_proxy_name) + + def create_secure_data_proxy_service(self): + """ + Generates a new secure-data-proxy service in Magpie app. + """ + # Create service + data = { + "service_name": self.secure_data_proxy_name, + "service_type": ServiceAPI.service_type, + "service_sync_type": ServiceAPI.service_type, + "service_url": f"http://localhost:9000/{self.secure_data_proxy_name}", + "configuration": {} + } + return test_magpie.create_service(HandlerFactory().get_handler("Magpie"), data) + + def test_user_wps_output_created(self): + """ + Tests creating wps outputs for a user. + """ + filesystem_handler = HandlerFactory().get_handler("FileSystem") + + # Error expected if the user workspace does not exist + with pytest.raises(FileNotFoundError): + filesystem_handler.on_created(self.output_file) + + # Create the user workspace + filesystem_handler.user_created(self.test_username) + + TestFileSystem.check_created_test_cases(self.output_file, self.hardlink_path) + + # A create event on a folder should not be processed (no corresponding target folder created) + src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.job_id}") + target_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") + shutil.rmtree(target_dir) + filesystem_handler.on_created(src_dir) + assert not os.path.exists(target_dir) + + def test_user_wps_output_created_secure_data_proxy(self): + """ + Tests creating wps outputs for a user when Magpie uses a secure-data-proxy service to manage access permissions + to the wps output data. + """ + filesystem_handler = HandlerFactory().get_handler("FileSystem") + magpie_handler = HandlerFactory().get_handler("Magpie") + filesystem_handler.user_created(self.test_username) + svc_id = self.create_secure_data_proxy_service() + + # Note that the following test cases are made to be executed in a specific order and are not interchangeable. + test_cases = [{ + # If secure-data-proxy service exists but no route is defined for wpsoutputs, + # assume access is not allowed and check if no hardlink is created. + "routes_to_create": [], + "permissions_cases": [("", "", False, 0o660)] + }, { + # Permission applied only on a parent resource + # If the route is only defined on a parent resource and no route are defined for the actual file, + # assume access is the same as the access of the parent, and hardlink should be created accordingly. + "routes_to_create": ["wpsoutputs"], + "permissions_cases": [(Permission.READ.value, Access.DENY.value, False, 0o660), + (Permission.READ.value, Access.ALLOW.value, True, 0o664), + (Permission.WRITE.value, Access.ALLOW.value, True, 0o666), + (Permission.WRITE.value, Access.DENY.value, True, 0o664)] + }, { + # Permission applied on the actual resource - Test access with an exact route match + # Create the rest of the route to get a route that match the actual full path of the resource + "routes_to_create": re.sub(rf"^{self.wpsoutputs_dir}", "", self.output_file).strip("/").split("/"), + "permissions_cases": [(Permission.READ.value, Access.DENY.value, False, 0o660), + (Permission.READ.value, Access.ALLOW.value, True, 0o664), + (Permission.WRITE.value, Access.ALLOW.value, True, 0o666), + (Permission.WRITE.value, Access.DENY.value, True, 0o664)]}] + # Resource id of the last existing route resource found from the path of the test file + last_res_id = svc_id + + for test_case in test_cases: + # Create routes found in list + for route in test_case["routes_to_create"]: + last_res_id = magpie_handler.create_resource(route, Route.resource_type_name, last_res_id) + for perm_name, perm_access, expecting_created_file, expected_file_perms in test_case["permissions_cases"]: + # Reset hardlink file for each test + shutil.rmtree(self.wps_outputs_user_dir, ignore_errors=True) + + # Create permission if required + if perm_name and perm_access: + magpie_handler.create_permission_by_user_and_res_id( + user_name=self.test_username, + res_id=last_res_id, + perm_name=perm_name, + perm_access=perm_access, + perm_scope=Scope.MATCH.value) + + # Check if file is created according to permissions + filesystem_handler.on_created(self.output_file) + assert expecting_created_file == os.path.exists(self.hardlink_path) + utils.check_path_permissions(self.output_file, expected_file_perms) + + def test_user_wps_output_deleted(self): + """ + Tests deleting wps outputs for a user. + """ + filesystem_handler = HandlerFactory().get_handler("FileSystem") + + # Create the user workspace + filesystem_handler.user_created(self.test_username) + + # Basic test cases for deleting user wps outputs. More extensive delete test cases are done in the public tests. + # Test deleting a user file. + filesystem_handler.on_created(self.output_file) + assert os.path.exists(self.hardlink_path) + filesystem_handler.on_deleted(self.output_file) + assert not os.path.exists(self.hardlink_path) + + # Test deleting a user directory + src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.job_id}") + target_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") + assert os.path.exists(target_dir) + filesystem_handler.on_deleted(src_dir) + assert not os.path.exists(target_dir) From 441e34a276f244930fc84355cc68541e42316531 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Fri, 28 Jul 2023 10:51:26 -0400 Subject: [PATCH 03/17] implement resync section for the user wpsoutputs data --- cowbird/handlers/impl/filesystem.py | 30 ++++++++++++------- cowbird/handlers/impl/magpie.py | 9 ++++++ tests/test_filesystem.py | 46 ++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index b72c5102..a1ab58e8 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -292,17 +292,25 @@ def resync(self) -> None: else: # Delete the content of the linked public folder, but keep the folder to avoid breaking the volume # if the folder is mounted on a Docker container - for filename in os.listdir(self.get_wps_outputs_public_dir()): - file_path = os.path.join(self.get_wps_outputs_public_dir(), filename) - try: - if os.path.isfile(file_path): - os.remove(file_path) - elif os.path.isdir(file_path): - shutil.rmtree(file_path) - except Exception as exc: - LOGGER.error("Failed to delete path [%s] : %s", file_path, exc) - - # TODO: remove the users hardlinks to wpsoutputs too + wps_outputs_public_dir = self.get_wps_outputs_public_dir() + if not os.path.exists(wps_outputs_public_dir): + LOGGER.debug("Linked public wps outputs data folder [%s] does not exist. " + "No public file to delete for the resync operation.", wps_outputs_public_dir) + else: + for filename in os.listdir(wps_outputs_public_dir): + file_path = os.path.join(wps_outputs_public_dir, filename) + try: + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as exc: + LOGGER.error("Failed to delete path [%s] : %s", file_path, exc) + + # Delete wps outputs hardlinks for each user + user_list = HandlerFactory().get_handler("Magpie").get_user_list() + for user_name in user_list: + shutil.rmtree(self.get_wps_outputs_user_dir(user_name), ignore_errors=True) # Create all hardlinks from files of the current source folder for root, _, filenames in os.walk(self.wps_outputs_dir): diff --git a/cowbird/handlers/impl/magpie.py b/cowbird/handlers/impl/magpie.py index 1c407603..6207f4c1 100644 --- a/cowbird/handlers/impl/magpie.py +++ b/cowbird/handlers/impl/magpie.py @@ -216,6 +216,15 @@ def get_geoserver_layer_res_id(self, workspace_name: str, layer_name: str, creat parent_id=workspace_res_id) return layer_res_id + def get_user_list(self) -> List[str]: + """ + Returns the list of all Magpie users. + """ + resp = self._send_request(method="GET", url=f"{self.url}/users", params={"detail": False}) + if resp.status_code != 200: + raise MagpieHttpError(f"Could not find the list of users. HttpError {resp.status_code} : {resp.text}") + return resp.json()["user_names"] + def get_user_name_from_user_id(self, user_id: int) -> str: """ Finds the name of a user from his user id. diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index e37e5031..9910230a 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -278,8 +278,14 @@ def test_resync(self): """ Tests resync operation for the handler. """ + load_dotenv(CURR_DIR / "../docker/.env.example") app = self.get_test_app({ "handlers": { + "Magpie": { + "active": True, + "url": os.getenv("COWBIRD_TEST_MAGPIE_URL"), + "admin_user": os.getenv("MAGPIE_ADMIN_USER"), + "admin_password": os.getenv("MAGPIE_ADMIN_PASSWORD")}, "FileSystem": { "active": True, "workspace_dir": self.workspace_dir, @@ -370,7 +376,7 @@ class TestFileSystemWpsOutputsUser(TestFileSystem): def setUp(self): super().setUp() load_dotenv(CURR_DIR / "../docker/.env.example") - self.get_test_app({ + self.app = self.get_test_app({ "handlers": { "Magpie": { "active": True, @@ -524,3 +530,41 @@ def test_user_wps_output_deleted(self): assert os.path.exists(target_dir) filesystem_handler.on_deleted(src_dir) assert not os.path.exists(target_dir) + + def test_resync(self): + """ + Tests resync operation on wpsoutputs user data. + """ + filesystem_handler = HandlerFactory().get_handler("FileSystem") + filesystem_handler.user_created(self.test_username) + + test_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") + + # Create a file in a subfolder of the linked folder that should be removed by the resync + old_nested_file = os.path.join(test_dir, "old_dir/old_file.txt") + os.makedirs(os.path.dirname(old_nested_file)) + with open(old_nested_file, mode="w", encoding="utf-8"): + pass + + # Create an empty subfolder in the linked folder that should be removed by the resync + old_subdir = os.path.join(test_dir, "empty_subdir") + os.mkdir(old_subdir) + + # Create a new empty dir (should not appear in the resynced wpsoutputs since only files are processed) + new_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/new_dir") + os.mkdir(new_dir) + new_dir_linked_path = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/new_dir") + + # Check that old files exist before applying the resync + assert not os.path.exists(self.hardlink_path) + assert os.path.exists(old_nested_file) + assert os.path.exists(old_subdir) + + resp = utils.test_request(self.app, "PUT", "/handlers/FileSystem/resync") + + # Check that new hardlinks are generated and old files are removed + assert resp.status_code == 200 + assert os.stat(self.hardlink_path).st_nlink == 2 + assert not os.path.exists(new_dir_linked_path) + assert not os.path.exists(old_nested_file) + assert not os.path.exists(old_subdir) From 8b0cddc3fca66928687696f6e3d4049fa370afb8 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Fri, 28 Jul 2023 12:07:38 -0400 Subject: [PATCH 04/17] generate user wpsoutputs hardlinks, if any, upon user creation --- cowbird/handlers/impl/filesystem.py | 25 +++++++++++++++++++------ tests/test_filesystem.py | 21 ++++++++++++++------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index a1ab58e8..28e4c69f 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -70,11 +70,11 @@ def start_wpsoutputs_monitoring(self, monitoring: Monitoring) -> None: def get_resource_id(self, resource_full_name: str) -> int: raise NotImplementedError - def _get_user_workspace_dir(self, user_name: str) -> str: + def get_user_workspace_dir(self, user_name: str) -> str: return os.path.join(self.workspace_dir, user_name) def get_wps_outputs_user_dir(self, user_name: str) -> str: - return os.path.join(self._get_user_workspace_dir(user_name), USER_WPS_OUTPUTS_USER_DIR_NAME) + return os.path.join(self.get_user_workspace_dir(user_name), USER_WPS_OUTPUTS_USER_DIR_NAME) def get_wps_outputs_public_dir(self) -> str: return os.path.join(self.workspace_dir, self.wps_outputs_public_subdir) @@ -102,7 +102,7 @@ def _create_symlink_dir(src: str, dst: str) -> None: os.symlink(src, dst, target_is_directory=True) def user_created(self, user_name: str) -> None: - user_workspace_dir = self._get_user_workspace_dir(user_name) + user_workspace_dir = self.get_user_workspace_dir(user_name) try: os.mkdir(user_workspace_dir) except FileExistsError: @@ -112,8 +112,15 @@ def user_created(self, user_name: str) -> None: FileSystem._create_symlink_dir(src=self._get_jupyterhub_user_data_dir(user_name), dst=os.path.join(user_workspace_dir, NOTEBOOKS_DIR_NAME)) + # Create all hardlinks from the user wps outputs data + for root, _, filenames in os.walk(self.wps_outputs_dir): + for file in filenames: + full_path = os.path.join(root, file) + self._create_wpsoutputs_hardlink(src_path=full_path, overwrite=True, + process_user_files=True, process_public_files=False) + def user_deleted(self, user_name: str) -> None: - user_workspace_dir = self._get_user_workspace_dir(user_name) + user_workspace_dir = self.get_user_workspace_dir(user_name) try: shutil.rmtree(user_workspace_dir) except FileNotFoundError: @@ -131,7 +138,7 @@ def _get_public_hardlink(self, src_path: str) -> str: return os.path.join(self.get_wps_outputs_public_dir(), subpath) def _get_user_hardlink(self, src_path: str, bird_name: str, user_name: str, subpath: str) -> str: - user_workspace_dir = self._get_user_workspace_dir(user_name) + user_workspace_dir = self.get_user_workspace_dir(user_name) if not os.path.exists(user_workspace_dir): raise FileNotFoundError(f"User {user_name} workspace not found at path {user_workspace_dir}. New " f"wpsoutput {src_path} not added to the user workspace.") @@ -181,10 +188,14 @@ def _get_secure_data_proxy_file_perms(self, src_path: str, user_name: str) -> Tu is_writable = write_access == Access.ALLOW.value return is_readable, is_writable - def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False) -> None: + def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, + process_user_files: bool = True, process_public_files: bool = True) -> None: regex_match = re.search(self.wps_outputs_users_regex, src_path) access_allowed = True if regex_match: # user files + if not process_user_files: + return + magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) hardlink_path = self._get_user_hardlink(src_path=src_path, @@ -208,6 +219,8 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False) -> # If no permission on the file, the hardlink should not be created. access_allowed = False else: # public files + if not process_public_files: + return hardlink_path = self._get_public_hardlink(src_path) if os.path.exists(hardlink_path): diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 9910230a..5ab57314 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -390,17 +390,18 @@ def setUp(self): "wps_outputs_dir": self.wpsoutputs_dir}}}) # Reset test user + filesystem_handler = HandlerFactory().get_handler("FileSystem") magpie_handler = HandlerFactory().get_handler("Magpie") test_magpie.delete_user(magpie_handler, self.test_username) self.user_id = test_magpie.create_user(magpie_handler, self.test_username, "test@test.com", "qwertyqwerty", "users") + filesystem_handler.user_created(self.test_username) self.job_id = 1 self.bird_name = "weaver" self.output_subpath = f"{self.job_id}/test_output.txt" self.output_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.output_subpath}") - filesystem_handler = HandlerFactory().get_handler("FileSystem") self.wps_outputs_user_dir = filesystem_handler.get_wps_outputs_user_dir(self.test_username) # Hardlink for user files doesn't use the full subpath, but removes the redundant `users` and `{user_id}` parts. self.hardlink_path = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.output_subpath}") @@ -435,6 +436,7 @@ def test_user_wps_output_created(self): filesystem_handler = HandlerFactory().get_handler("FileSystem") # Error expected if the user workspace does not exist + shutil.rmtree(filesystem_handler.get_user_workspace_dir(self.test_username)) with pytest.raises(FileNotFoundError): filesystem_handler.on_created(self.output_file) @@ -450,6 +452,17 @@ def test_user_wps_output_created(self): filesystem_handler.on_created(src_dir) assert not os.path.exists(target_dir) + def test_user_created(self): + """ + Tests if creating a user generates the hardlinks to the pre-existing user wpsoutputs data. + """ + filesystem_handler = HandlerFactory().get_handler("FileSystem") + shutil.rmtree(filesystem_handler.get_user_workspace_dir(self.test_username)) + + # Simulate a user_created event and check that the expected hardlink is generated. + filesystem_handler.user_created(self.test_username) + assert os.stat(self.hardlink_path).st_nlink == 2 + def test_user_wps_output_created_secure_data_proxy(self): """ Tests creating wps outputs for a user when Magpie uses a secure-data-proxy service to manage access permissions @@ -457,7 +470,6 @@ def test_user_wps_output_created_secure_data_proxy(self): """ filesystem_handler = HandlerFactory().get_handler("FileSystem") magpie_handler = HandlerFactory().get_handler("Magpie") - filesystem_handler.user_created(self.test_username) svc_id = self.create_secure_data_proxy_service() # Note that the following test cases are made to be executed in a specific order and are not interchangeable. @@ -514,9 +526,6 @@ def test_user_wps_output_deleted(self): """ filesystem_handler = HandlerFactory().get_handler("FileSystem") - # Create the user workspace - filesystem_handler.user_created(self.test_username) - # Basic test cases for deleting user wps outputs. More extensive delete test cases are done in the public tests. # Test deleting a user file. filesystem_handler.on_created(self.output_file) @@ -535,8 +544,6 @@ def test_resync(self): """ Tests resync operation on wpsoutputs user data. """ - filesystem_handler = HandlerFactory().get_handler("FileSystem") - filesystem_handler.user_created(self.test_username) test_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") From 7eeada5003fed305fea42fdb196e7e360a32998d Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Mon, 7 Aug 2023 13:01:42 -0400 Subject: [PATCH 05/17] implement permission_created and permission_deleted events to sync secure-data-proxy perms with wpsoutputs --- cowbird/handlers/impl/filesystem.py | 193 +++++++++++++----- cowbird/handlers/impl/geoserver.py | 52 +---- cowbird/handlers/impl/magpie.py | 19 ++ cowbird/utils.py | 35 +++- tests/test_filesystem.py | 293 +++++++++++++++++++++++++++- tests/test_magpie.py | 17 ++ 6 files changed, 513 insertions(+), 96 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index 28e4c69f..fa0b0b26 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -1,9 +1,8 @@ import os import re import shutil -import stat from pathlib import Path -from typing import Any, Tuple, cast +from typing import Any, Tuple, cast, List from magpie.permissions import Access from magpie.permissions import Permission as MagpiePermission @@ -14,8 +13,8 @@ from cowbird.monitoring.fsmonitor import FSMonitor from cowbird.monitoring.monitoring import Monitoring from cowbird.permissions_synchronizer import Permission -from cowbird.typedefs import SettingsType -from cowbird.utils import get_logger, update_filesystem_permissions +from cowbird.typedefs import JSON, SettingsType +from cowbird.utils import get_logger, apply_new_path_permissions LOGGER = get_logger(__name__) @@ -58,7 +57,7 @@ def __init__(self, # Regex to find any directory or file found in the `users` output path of a 'bird' service # {self.wps_outputs_dir}//users//... - self.wps_outputs_users_regex = rf"^{self.wps_outputs_dir}/(\w+)/users/(\d+)/(.+)" + self.wps_outputs_user_data_regex = rf"^{self.wps_outputs_dir}/(\w+)/users/(\d+)/(.+)" def start_wpsoutputs_monitoring(self, monitoring: Monitoring) -> None: if os.path.exists(self.wps_outputs_dir): @@ -188,9 +187,41 @@ def _get_secure_data_proxy_file_perms(self, src_path: str, user_name: str) -> Tu is_writable = write_access == Access.ALLOW.value return is_readable, is_writable + def update_secure_data_proxy_path_perms(self, src_path: str, user_name: str) -> bool: + """ + Gets a path's permissions from the secure-data-proxy service and updates the file system permissions + accordingly. + + Returns a boolean to indicate if the user should have some type of access to the path or not. + """ + is_readable, is_writable = self._get_secure_data_proxy_file_perms(src_path, user_name) + apply_new_path_permissions(src_path, is_readable, is_writable, is_executable=False) + + access_allowed = True + if not is_readable and not is_writable: + # If no permission on the file, the hardlink should not be created. + access_allowed = False + return access_allowed + + @staticmethod + def create_hardlink_path(src_path: str, hardlink_path: str, access_allowed: bool) -> None: + """ + Creates a hardlink path from a source file, if the user has access rights. + """ + if access_allowed: + os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) + LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) + try: + os.link(src_path, hardlink_path) + except Exception as exc: + LOGGER.warning("Failed to create hardlink `%s` : %s", hardlink_path, exc) + else: + LOGGER.debug("Access to the wps output file `%s` is not allowed for the user. No hardlink created.", + src_path) + def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, process_user_files: bool = True, process_public_files: bool = True) -> None: - regex_match = re.search(self.wps_outputs_users_regex, src_path) + regex_match = re.search(self.wps_outputs_user_data_regex, src_path) access_allowed = True if regex_match: # user files if not process_user_files: @@ -206,18 +237,9 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, if SECURE_DATA_PROXY_NAME not in api_services: LOGGER.debug("`%s` service not found. Considering user wpsoutputs data as accessible by default.", SECURE_DATA_PROXY_NAME) - else: # get and apply permissions if the secure-data-proxy exists - is_readable, is_writable = self._get_secure_data_proxy_file_perms(src_path, user_name) - - previous_perms = os.stat(src_path)[stat.ST_MODE] & 0o777 - new_perms = update_filesystem_permissions(previous_perms, - is_readable=is_readable, - is_writable=is_writable, - is_executable=False) - os.chmod(src_path, new_perms) - if not is_readable and not is_writable: - # If no permission on the file, the hardlink should not be created. - access_allowed = False + apply_new_path_permissions(src_path, True, True, False) + else: # get access and apply permissions if the secure-data-proxy exists + access_allowed = self.update_secure_data_proxy_path_perms(src_path, user_name) else: # public files if not process_public_files: return @@ -232,16 +254,7 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, "created file.", hardlink_path) os.remove(hardlink_path) - if access_allowed: - os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) - LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) - try: - os.link(src_path, hardlink_path) - except Exception as exc: - LOGGER.warning("Failed to create hardlink `%s` : %s", hardlink_path, exc) - else: - LOGGER.debug("Access to the wps output file `%s` is not allowed for the user. No hardlink created.", - src_path) + self.create_hardlink_path(src_path, hardlink_path, access_allowed) def on_created(self, path: str) -> None: """ @@ -262,6 +275,37 @@ def on_modified(self, path: str) -> None: """ # Nothing to do for files in the wps_outputs folder, since hardlinks are updated automatically. + def _delete_wpsoutputs_hardlink(self, src_path: str, + process_user_files: bool = True, process_public_files: bool = True) -> bool: + """ + Deletes the hardlink path that corresponds to the input source path. + + Returns a bool to indicate if a hardlink path was deleted or not. + """ + regex_match = re.search(self.wps_outputs_user_data_regex, src_path) + try: + if regex_match: # user paths + if not process_user_files: + return False + magpie_handler = HandlerFactory().get_handler("Magpie") + user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) + linked_path = self._get_user_hardlink(src_path=src_path, + bird_name=regex_match.group(1), + user_name=user_name, + subpath=regex_match.group(3)) + else: # public paths + if not process_public_files: + return False + linked_path = self._get_public_hardlink(src_path) + if os.path.isdir(linked_path): + os.rmdir(linked_path) + else: + os.remove(linked_path) + return True + except FileNotFoundError: + LOGGER.debug("No linked path to delete for the `on_deleted` event of the wpsoutput path `%s`.", src_path) + return False + def on_deleted(self, path: str) -> None: """ Called when a path is deleted. @@ -270,29 +314,82 @@ def on_deleted(self, path: str) -> None: """ if Path(self.wps_outputs_dir) in Path(path).parents: LOGGER.info("Removing link associated to the deleted path `%s`", path) - regex_match = re.search(self.wps_outputs_users_regex, path) - try: - if regex_match: # user paths - magpie_handler = HandlerFactory().get_handler("Magpie") - user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) - linked_path = self._get_user_hardlink(src_path=path, - bird_name=regex_match.group(1), - user_name=user_name, - subpath=regex_match.group(3)) - else: # public paths - linked_path = self._get_public_hardlink(path) - if os.path.isdir(linked_path): - os.rmdir(linked_path) - else: - os.remove(linked_path) - except FileNotFoundError: - LOGGER.debug("No linked path to delete for the `on_deleted` event of the wpsoutput path `%s`.", path) + self._delete_wpsoutputs_hardlink(path) + + @staticmethod + def _check_if_res_from_secure_data_proxy(res_tree: List[JSON]) -> bool: + """ + Checks if the resource if part of a secure-data-proxy service of type API. + """ + root_res_info = res_tree[0] + if root_res_info["resource_name"] == SECURE_DATA_PROXY_NAME: + svc_info = HandlerFactory().get_handler("Magpie").get_service_info(SECURE_DATA_PROXY_NAME) + if svc_info["service_type"] == ServiceAPI.service_type: + return True + + # No secure-data-proxy with the expected service type + return False + + def _update_permissions_on_filesystem(self, permission: Permission) -> None: + magpie_handler = HandlerFactory().get_handler("Magpie") + res_tree = magpie_handler.get_parents_resource_tree(permission.resource_id) + + if self._check_if_res_from_secure_data_proxy(res_tree): + full_route = self.wps_outputs_dir + # Add subpath if the resource is a child of the main wpsoutputs resource + if len(res_tree) > 2: + child_route = "/".join([res["resource_name"] for res in res_tree[2:]]) + full_route = os.path.join(full_route, child_route) + + # Find all users related to the permission + if permission.user: + users = {magpie_handler.get_user_id_from_user_name(permission.user): permission.user} + else: + # Find all users from the group + users = {} + for username in magpie_handler.get_user_names_by_group_name(permission.group): + users[magpie_handler.get_user_id_from_user_name(username)] = username + + # Find all contained user paths + user_routes = {} + if os.path.isfile(full_route): + # use current route directly if it's a user data file + regex_match = re.search(self.wps_outputs_user_data_regex, full_route) + if regex_match and int(regex_match.group(2)) in users: + user_routes[full_route] = regex_match + else: # dir case, browse to find all children user file paths + for root, _, filenames in os.walk(full_route): + for file in filenames: + full_path = os.path.join(root, file) + regex_match = re.search(self.wps_outputs_user_data_regex, full_path) + if regex_match and int(regex_match.group(2)) in users: + user_routes[full_path] = regex_match + + # Update permissions for all found user paths + for user_path, path_regex_match in user_routes.items(): + user_name = users[int(path_regex_match.group(2))] + access_allowed = self.update_secure_data_proxy_path_perms(user_path, user_name) + try: + hardlink_path = self._get_user_hardlink(src_path=user_path, + bird_name=path_regex_match.group(1), + user_name=user_name, + subpath=path_regex_match.group(3)) + except FileNotFoundError: + LOGGER.debug("Failed to find a hardlink path corresponding to the source path [%s]. The user `%s` " + "should already have an existing workspace.", + user_path, user_name) + continue + + # Resync hardlink path + if os.path.exists(hardlink_path): + os.remove(hardlink_path) + self.create_hardlink_path(user_path, hardlink_path, access_allowed) def permission_created(self, permission: Permission) -> None: - raise NotImplementedError + self._update_permissions_on_filesystem(permission) def permission_deleted(self, permission: Permission) -> None: - raise NotImplementedError + self._update_permissions_on_filesystem(permission) def resync(self) -> None: """ @@ -331,4 +428,4 @@ def resync(self) -> None: full_path = os.path.join(root, file) self._create_wpsoutputs_hardlink(src_path=full_path, overwrite=True) # TODO: add resync of the user_workspace symlinks to the jupyterhub dirs, - # will be added during the resync task implementation + # will be added during the resync task implementation diff --git a/cowbird/handlers/impl/geoserver.py b/cowbird/handlers/impl/geoserver.py index 8aa751cd..1d619b63 100644 --- a/cowbird/handlers/impl/geoserver.py +++ b/cowbird/handlers/impl/geoserver.py @@ -11,7 +11,6 @@ from magpie.models import Layer, Workspace from magpie.permissions import Access, Scope -from cowbird.constants import DEFAULT_ADMIN_GID, DEFAULT_ADMIN_UID from cowbird.handlers.handler import HANDLER_URL_PARAM, HANDLER_WORKSPACE_DIR_PARAM, Handler from cowbird.handlers.handler_factory import HandlerFactory from cowbird.handlers.impl.magpie import GEOSERVER_READ_PERMISSIONS, GEOSERVER_WRITE_PERMISSIONS @@ -20,7 +19,7 @@ from cowbird.permissions_synchronizer import Permission from cowbird.request_task import RequestTask from cowbird.typedefs import JSON, SettingsType -from cowbird.utils import CONTENT_TYPE_JSON, get_logger, update_filesystem_permissions +from cowbird.utils import CONTENT_TYPE_JSON, get_logger, apply_default_path_ownership, apply_new_path_permissions GeoserverType: TypeAlias = "Geoserver" # need a reference for the decorator before it gets defined @@ -207,39 +206,6 @@ def get_shapefile_list(self, workspace_name: str, shapefile_name: str) -> List[s base_filename = self._shapefile_folder_dir(workspace_name) + "/" + shapefile_name return [base_filename + ext for ext in SHAPEFILE_ALL_EXTENSIONS] - @staticmethod - def _apply_new_path_permissions(path: str, is_readable: bool, is_writable: bool, is_executable: bool) -> None: - """ - Applies new permissions to a path, if required. - """ - # Only use the 3 last octal digits - previous_perms = os.stat(path)[stat.ST_MODE] & 0o777 - - new_perms = update_filesystem_permissions(previous_perms, - is_readable=is_readable, - is_writable=is_writable, - is_executable=is_executable) - # Only apply chmod if there is an actual change, to avoid looping events between Magpie and Cowbird - if new_perms != previous_perms: - try: - os.chmod(path, new_perms) - except PermissionError as exc: - LOGGER.warning("Failed to change permissions on the %s path: %s", path, exc) - - @staticmethod - def _apply_default_path_ownership(path: str) -> None: - """ - Applies default ownership to a path, if required. - """ - path_stat = os.stat(path) - # Only apply chown if there is an actual change, to avoid looping events between Magpie and Cowbird - if path_stat.st_uid != DEFAULT_ADMIN_UID or path_stat.st_gid != DEFAULT_ADMIN_GID: - try: - # This operation only works as root. - os.chown(path, DEFAULT_ADMIN_UID, DEFAULT_ADMIN_GID) - except PermissionError as exc: - LOGGER.warning("Failed to change ownership of the %s path: %s", path, exc) - def _update_resource_paths_permissions(self, resource_type: str, permission: Permission, @@ -277,8 +243,8 @@ def _update_resource_paths_permissions(self, if path.endswith(tuple(SHAPEFILE_REQUIRED_EXTENSIONS)): LOGGER.warning("%s could not be found and its permissions could not be updated.", path) continue - self._apply_new_path_permissions(path, is_readable, is_writable, is_executable) - self._apply_default_path_ownership(path) + apply_new_path_permissions(path, is_readable, is_writable, is_executable) + apply_default_path_ownership(path) def _update_resource_paths_permissions_recursive(self, resource: JSON, @@ -530,7 +496,7 @@ def _update_magpie_workspace_permissions(self, workspace_name: str) -> None: datastore_dir_path = self._shapefile_folder_dir(workspace_name) # Make sure the directory has the right ownership - self._apply_default_path_ownership(datastore_dir_path) + apply_default_path_ownership(datastore_dir_path) workspace_status = os.stat(datastore_dir_path)[stat.ST_MODE] is_readable = bool(workspace_status & stat.S_IROTH and workspace_status & stat.S_IXOTH) @@ -661,11 +627,11 @@ def _normalize_shapefile_permissions(self, """ for shapefile in self.get_shapefile_list(workspace_name, shapefile_name): if os.path.exists(shapefile): - self._apply_default_path_ownership(shapefile) - self._apply_new_path_permissions(shapefile, - is_readable=is_readable, - is_writable=is_writable, - is_executable=False) + apply_default_path_ownership(shapefile) + apply_new_path_permissions(shapefile, + is_readable=is_readable, + is_writable=is_writable, + is_executable=False) def remove_shapefile(self, workspace_name: str, filename: str) -> None: """ diff --git a/cowbird/handlers/impl/magpie.py b/cowbird/handlers/impl/magpie.py index 6207f4c1..be98bd2d 100644 --- a/cowbird/handlers/impl/magpie.py +++ b/cowbird/handlers/impl/magpie.py @@ -225,6 +225,15 @@ def get_user_list(self) -> List[str]: raise MagpieHttpError(f"Could not find the list of users. HttpError {resp.status_code} : {resp.text}") return resp.json()["user_names"] + def get_user_id_from_user_name(self, user_name: str) -> int: + """ + Finds the id of a user from his username. + """ + resp = self._send_request(method="GET", url=f"{self.url}/users/{user_name}") + if resp.status_code != 200: + raise MagpieHttpError(f"Could not find the user `{user_name}`. HttpError {resp.status_code} : {resp.text}") + return resp.json()["user"]["user_id"] + def get_user_name_from_user_id(self, user_id: int) -> str: """ Finds the name of a user from his user id. @@ -255,6 +264,16 @@ def get_user_permissions_by_res_id(self, user: str, res_id: int, effective: bool f"HttpError {resp.status_code} : {resp.text}") return resp.json() + def get_user_names_by_group_name(self, grp_name: str) -> List[str]: + """ + Returns the list of Magpie usernames from a group. + """ + resp = self._send_request(method="GET", url=f"{self.url}/groups/{grp_name}/users") + if resp.status_code != 200: + raise MagpieHttpError(f"Could not find the list of usernames from group {grp_name}. " + f"HttpError {resp.status_code} : {resp.text}") + return resp.json()["user_names"] + def get_group_permissions(self, grp: str) -> Dict[str, JSON]: """ Gets all group resource permissions. diff --git a/cowbird/utils.py b/cowbird/utils.py index bcdd4fab..553b110f 100644 --- a/cowbird/utils.py +++ b/cowbird/utils.py @@ -28,7 +28,7 @@ from webob.headers import EnvironHeaders, ResponseHeaders from cowbird import __meta__ -from cowbird.constants import get_constant, validate_required +from cowbird.constants import get_constant, validate_required, DEFAULT_ADMIN_GID, DEFAULT_ADMIN_UID from cowbird.typedefs import ( JSON, AnyHeadersType, @@ -588,6 +588,25 @@ def get_timeout(container: Optional[AnySettingsContainer] = None) -> int: raise_missing=False, raise_not_set=False)) +def apply_new_path_permissions(path: str, is_readable: bool, is_writable: bool, is_executable: bool) -> None: + """ + Applies new permissions to a path, if required. + """ + # Only use the 3 last octal digits + previous_perms = os.stat(path)[stat.ST_MODE] & 0o777 + + new_perms = update_filesystem_permissions(previous_perms, + is_readable=is_readable, + is_writable=is_writable, + is_executable=is_executable) + # Only apply chmod if there is an actual change, to avoid looping events between Magpie and Cowbird + if new_perms != previous_perms: + try: + os.chmod(path, new_perms) + except PermissionError as exc: + LOGGER.warning("Failed to change permissions on the %s path: %s", path, exc) + + def update_filesystem_permissions(permission: int, is_readable: bool, is_writable: bool, is_executable: bool) -> int: """ Applies/remove read, write and execute permissions (user only) to the input file system permissions. @@ -598,3 +617,17 @@ def update_filesystem_permissions(permission: int, is_readable: bool, is_writabl # Only use the 3 last octal digits permission = permission & 0o777 return permission + + +def apply_default_path_ownership(path: str) -> None: + """ + Applies default ownership to a path, if required. + """ + path_stat = os.stat(path) + # Only apply chown if there is an actual change, to avoid looping events between Magpie and Cowbird + if path_stat.st_uid != DEFAULT_ADMIN_UID or path_stat.st_gid != DEFAULT_ADMIN_GID: + try: + # This operation only works as root. + os.chown(path, DEFAULT_ADMIN_UID, DEFAULT_ADMIN_GID) + except PermissionError as exc: + LOGGER.warning("Failed to change ownership of the %s path: %s", path, exc) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 5ab57314..864c2095 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -15,6 +15,7 @@ from magpie.services import ServiceAPI from webtest.app import TestApp +from cowbird.api.schemas import ValidOperations from cowbird.handlers import HandlerFactory from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME, SECURE_DATA_PROXY_NAME from cowbird.typedefs import JSON @@ -399,17 +400,21 @@ def setUp(self): self.job_id = 1 self.bird_name = "weaver" - self.output_subpath = f"{self.job_id}/test_output.txt" + self.output_filename = "test_output.txt" + subpath = f"{self.job_id}/{self.output_filename}" self.output_file = os.path.join(self.wpsoutputs_dir, - f"{self.bird_name}/users/{self.user_id}/{self.output_subpath}") + f"{self.bird_name}/users/{self.user_id}/{subpath}") self.wps_outputs_user_dir = filesystem_handler.get_wps_outputs_user_dir(self.test_username) - # Hardlink for user files doesn't use the full subpath, but removes the redundant `users` and `{user_id}` parts. - self.hardlink_path = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.output_subpath}") + self.hardlink_path = filesystem_handler._get_user_hardlink(src_path=self.output_file, + bird_name=self.bird_name, + user_name=self.test_username, + subpath=subpath) # Create the test wps output file os.makedirs(os.path.dirname(self.output_file)) with open(self.output_file, mode="w", encoding="utf-8"): pass + os.chmod(self.output_file, 0o666) self.secure_data_proxy_name = SECURE_DATA_PROXY_NAME # Delete the service if it already exists @@ -429,6 +434,17 @@ def create_secure_data_proxy_service(self): } return test_magpie.create_service(HandlerFactory().get_handler("Magpie"), data) + @staticmethod + def check_path_perms_and_hardlink(src_path: str, hardlink_path: str, perms: int): + """ + Checks if a path has the expected permissions, and if a hardlink exists, depending of the `other` permissions. + """ + utils.check_path_permissions(src_path, perms) + if perms & 0o006: # check if path has a read or write permission set for `other` + assert os.path.exists(hardlink_path) + else: + assert not os.path.exists(hardlink_path) + def test_user_wps_output_created(self): """ Tests creating wps outputs for a user. @@ -575,3 +591,272 @@ def test_resync(self): assert not os.path.exists(new_dir_linked_path) assert not os.path.exists(old_nested_file) assert not os.path.exists(old_subdir) + + def test_permission_updates_user_data(self): + """ + Tests updating permissions on data found directly in a specific user directory. + """ + magpie_handler = HandlerFactory().get_handler("Magpie") + # Create resources for the full route + svc_id = self.create_secure_data_proxy_service() + + # Prepare test files + subdir_test_file = self.output_file + subdir_hardlink = self.hardlink_path + root_test_filename = "other_file.txt" + root_test_file = os.path.join(self.wpsoutputs_dir, + f"{self.bird_name}/users/{self.user_id}/{root_test_filename}") + root_hardlink = HandlerFactory().get_handler("FileSystem")._get_user_hardlink( + src_path=root_test_file, bird_name=self.bird_name, user_name=self.test_username, subpath=root_test_filename) + with open(root_test_file, mode="w", encoding="utf-8"): + pass + for file in [root_test_file, subdir_test_file]: + os.chmod(file, 0o660) + + # Prepare test routes + user_dir_res_id = None + routes = re.sub(rf"^{self.wpsoutputs_dir}", "wpsoutputs", self.output_file).strip("/") + last_res_id = svc_id + for route in routes.split("/"): + last_res_id = magpie_handler.create_resource(route, Route.resource_type_name, last_res_id) + if route == str(self.user_id): + user_dir_res_id = last_res_id + if not user_dir_res_id: + raise ValueError("Missing resource id for the user directory, invalid test.") + subdir_file_res_id = last_res_id + magpie_handler.create_resource(root_test_filename, Route.resource_type_name, user_dir_res_id) + + data = { + "event": ValidOperations.CreateOperation.value, + "service_name": None, + "service_type": ServiceAPI.service_type, + "resource_id": user_dir_res_id, + "resource_full_name": "test-full-name", + "name": Permission.READ.value, + "access": Access.ALLOW.value, + "scope": Scope.MATCH.value, + "user": self.test_username, + "group": None + } + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], + data["scope"], data["user"]) + # Files should still have no permissions since dir permission is match-only. + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(root_test_file, root_hardlink, 0o660) + self.check_path_perms_and_hardlink(subdir_test_file, subdir_hardlink, 0o660) + + # File permissions should be updated with the recursive permission on the parent directory. + data["scope"] = Scope.RECURSIVE.value + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], + data["scope"], data["user"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(root_test_file, root_hardlink, 0o664) + self.check_path_perms_and_hardlink(subdir_test_file, subdir_hardlink, 0o664) + + # Permission applied directly to a file + data["resource_id"] = subdir_file_res_id + data["name"] = Permission.WRITE.value + data["scope"] = Scope.MATCH.value + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], + data["scope"], data["user"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(root_test_file, root_hardlink, 0o664) + self.check_path_perms_and_hardlink(subdir_test_file, subdir_hardlink, 0o666) + + # Test deleting permission on a directory + data["event"] = ValidOperations.DeleteOperation.value + data["resource_id"] = user_dir_res_id + data["name"] = Permission.READ.value + magpie_handler.delete_permission_by_user_and_res_id(data["user"], data["resource_id"], data["name"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(root_test_file, root_hardlink, 0o660) + self.check_path_perms_and_hardlink(subdir_test_file, subdir_hardlink, 0o662) + + # Test deleting permission on a file + data["resource_id"] = subdir_file_res_id + data["name"] = Permission.WRITE.value + magpie_handler.delete_permission_by_user_and_res_id(data["user"], data["resource_id"], data["name"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(root_test_file, root_hardlink, 0o660) + self.check_path_perms_and_hardlink(subdir_test_file, subdir_hardlink, 0o660) + + def test_permission_updates_wpsoutputs_data(self): + """ + Tests updating permissions on data found outside of the user directories, including testing permissions + on a user and on a group. + """ + filesystem_handler = HandlerFactory().get_handler("FileSystem") + magpie_handler = HandlerFactory().get_handler("Magpie") + # Create resources for the full route + svc_id = self.create_secure_data_proxy_service() + + # Create other user from a group different than the test group + test_magpie.delete_group(magpie_handler, "others") + test_magpie.create_group(magpie_handler, "others", "", True, "") + ignored_username = "ignored-user" + test_magpie.delete_user(magpie_handler, ignored_username) + ignored_user_id = test_magpie.create_user(magpie_handler, ignored_username, + "ignored@test.com", "qwertyqwerty", "others") + filesystem_handler.user_created(ignored_username) + + # Create other user from the same group as the original test user + same_group_username = "same-group-user" + test_magpie.delete_user(magpie_handler, same_group_username) + same_group_user_id = test_magpie.create_user(magpie_handler, same_group_username, + "samegroup@test.com", "qwertyqwerty", "users") + filesystem_handler.user_created(same_group_username) + + # Create other test files + # Public files should be ignored by following test cases, + # since public files are not concerned by perm change events. + public_file = os.path.join(self.wpsoutputs_dir, "public_file.txt") + public_subfile = os.path.join(self.wpsoutputs_dir, "public_dir/public_file.txt") + + # This file should be ignored by following test cases, being in a group different than the test group. + ignored_filename = "ignored.txt" + ignored_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{ignored_user_id}/{ignored_filename}") + ignored_hardlink = filesystem_handler._get_user_hardlink( + src_path=ignored_file, bird_name=self.bird_name, user_name=ignored_username, subpath=ignored_filename) + + same_group_filename = "same_group.txt" + same_group_file = os.path.join(self.wpsoutputs_dir, + f"{self.bird_name}/users/{same_group_user_id}/{same_group_filename}") + same_group_hardlink = filesystem_handler._get_user_hardlink(src_path=same_group_file, + bird_name=self.bird_name, + user_name=same_group_username, + subpath=same_group_filename) + + for file in [self.output_file, public_file, public_subfile, ignored_file, same_group_file]: + os.makedirs(os.path.dirname(file), exist_ok=True) + with open(file, mode="w", encoding="utf-8"): + pass + os.chmod(file, 0o660) + + # Create resource + res_id = magpie_handler.create_resource("wpsoutputs", Route.resource_type_name, svc_id) + + data = { + "event": ValidOperations.CreateOperation.value, + "service_name": None, + "service_type": ServiceAPI.service_type, + "resource_id": res_id, + "resource_full_name": "test-full-name", + "name": Permission.READ.value, + "access": Access.ALLOW.value, + "scope": Scope.RECURSIVE.value, + "user": self.test_username, + "group": None + } + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], data["scope"], + user_name=data["user"]) + # Check that perms was only updated for concerned user files + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o664) + self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) + self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o660) + utils.check_path_permissions(public_file, 0o660) + utils.check_path_permissions(public_subfile, 0o660) + + # Check that perms are updated for all the users of the concerned group + data["user"] = None + data["group"] = "users" + data["name"] = Permission.WRITE.value + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], data["scope"], + grp_name=data["group"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o666) + self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) + self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o662) + utils.check_path_permissions(public_file, 0o660) + utils.check_path_permissions(public_subfile, 0o660) + + # Add read-deny group permissions for next test cases + data["name"] = Permission.READ.value + data["access"] = Access.DENY.value + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], data["scope"], + grp_name=data["group"]) + utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o666) + + # Test deleting a specific user permission + data["event"] = ValidOperations.DeleteOperation.value + data["user"] = self.test_username + data["group"] = None + magpie_handler.delete_permission_by_user_and_res_id(data["user"], data["resource_id"], data["name"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o662) + self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) + self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o662) + utils.check_path_permissions(public_file, 0o660) + utils.check_path_permissions(public_subfile, 0o660) + + # Test deleting a group permission + data["name"] = Permission.WRITE.value + data["user"] = None + data["group"] = "users" + magpie_handler.delete_permission_by_grp_and_res_id(data["group"], data["resource_id"], data["name"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o660) + self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) + self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o660) + utils.check_path_permissions(public_file, 0o660) + utils.check_path_permissions(public_subfile, 0o660) + + def test_permission_updates_other_svc(self): + """ + Tests permission updates on a wpsoutputs resource from a service other than the secure-data-proxy, which + should not be processed by the filesystem handler. + """ + magpie_handler = HandlerFactory().get_handler("Magpie") + # Create resources for the full route + self.create_secure_data_proxy_service() + data = { + "service_name": "other_service", + "service_type": ServiceAPI.service_type, + "service_sync_type": ServiceAPI.service_type, + "service_url": f"http://localhost:9000/other_service", + "configuration": {} + } + test_magpie.delete_service(magpie_handler, "other_service") + other_svc_id = test_magpie.create_service(magpie_handler, data) + + # Create resource associated with the test file, on the other service + last_res_id = other_svc_id + routes = re.sub(rf"^{self.wpsoutputs_dir}", "", self.output_file).strip("/") + for route in routes.split("/"): + last_res_id = magpie_handler.create_resource(route, Route.resource_type_name, last_res_id) + + data = { + "event": ValidOperations.DeleteOperation.value, + "service_name": None, + "service_type": ServiceAPI.service_type, + "resource_id": last_res_id, + "resource_full_name": routes, + "name": Permission.READ.value, + "access": Access.ALLOW.value, + "scope": Scope.MATCH.value, + "user": self.test_username, + "group": None + } + # Check that a delete event on the resource of the other service does not modify the file permissions. + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + utils.check_path_permissions(self.output_file, 0o666) + + # Check that a create event on the resource of the other service does not modify the file permissions. + data["event"] = ValidOperations.CreateOperation.value + data["access"] = Access.DENY.value + magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], + data["scope"], data["user"]) + resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) + assert resp.status_code == 200 + utils.check_path_permissions(self.output_file, 0o666) diff --git a/tests/test_magpie.py b/tests/test_magpie.py index afab0faa..051622fe 100644 --- a/tests/test_magpie.py +++ b/tests/test_magpie.py @@ -34,6 +34,23 @@ def delete_user(magpie: Magpie, user_name: str) -> None: assert resp.status_code in [200, 404] +def create_group(magpie: Magpie, group_name: str, descr: str, discoverable: bool, terms: str) -> int: + resp = magpie._send_request(method="POST", url=f"{magpie.url}/groups", + json={ + "group_name": group_name, + "description": descr, + "discoverable": discoverable, + "terms": terms + }) + assert resp.status_code == 201 + return resp.json()["group"]["group_id"] + + +def delete_group(magpie: Magpie, group_name: str) -> None: + resp = magpie._send_request(method="DELETE", url=f"{magpie.url}/groups/{group_name}") + assert resp.status_code in [200, 404] + + def create_service(magpie: Magpie, service_data: Dict[str, str]) -> int: resp = magpie._send_request(method="POST", url=f"{magpie.url}/services", json=service_data) assert resp.status_code == 201 From 80dc64f74c1254089b51db3b4c29428d593f62c0 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Mon, 7 Aug 2023 15:46:09 -0400 Subject: [PATCH 06/17] fix lint --- cowbird/handlers/impl/filesystem.py | 32 ++++++++++++++--------------- cowbird/handlers/impl/geoserver.py | 2 +- cowbird/utils.py | 2 +- tests/test_filesystem.py | 32 ++++++++++++++--------------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index fa0b0b26..f038c241 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -2,7 +2,7 @@ import re import shutil from pathlib import Path -from typing import Any, Tuple, cast, List +from typing import Any, List, Tuple, cast from magpie.permissions import Access from magpie.permissions import Permission as MagpiePermission @@ -14,7 +14,7 @@ from cowbird.monitoring.monitoring import Monitoring from cowbird.permissions_synchronizer import Permission from cowbird.typedefs import JSON, SettingsType -from cowbird.utils import get_logger, apply_new_path_permissions +from cowbird.utils import apply_new_path_permissions, get_logger LOGGER = get_logger(__name__) @@ -136,7 +136,7 @@ def _get_public_hardlink(self, src_path: str) -> str: subpath = os.path.relpath(src_path, self.wps_outputs_dir) return os.path.join(self.get_wps_outputs_public_dir(), subpath) - def _get_user_hardlink(self, src_path: str, bird_name: str, user_name: str, subpath: str) -> str: + def get_user_hardlink(self, src_path: str, bird_name: str, user_name: str, subpath: str) -> str: user_workspace_dir = self.get_user_workspace_dir(user_name) if not os.path.exists(user_workspace_dir): raise FileNotFoundError(f"User {user_name} workspace not found at path {user_workspace_dir}. New " @@ -229,10 +229,10 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) - hardlink_path = self._get_user_hardlink(src_path=src_path, - bird_name=regex_match.group(1), - user_name=user_name, - subpath=regex_match.group(3)) + hardlink_path = self.get_user_hardlink(src_path=src_path, + bird_name=regex_match.group(1), + user_name=user_name, + subpath=regex_match.group(3)) api_services = magpie_handler.get_services_by_type(ServiceAPI.service_type) if SECURE_DATA_PROXY_NAME not in api_services: LOGGER.debug("`%s` service not found. Considering user wpsoutputs data as accessible by default.", @@ -289,10 +289,10 @@ def _delete_wpsoutputs_hardlink(self, src_path: str, return False magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) - linked_path = self._get_user_hardlink(src_path=src_path, - bird_name=regex_match.group(1), - user_name=user_name, - subpath=regex_match.group(3)) + linked_path = self.get_user_hardlink(src_path=src_path, + bird_name=regex_match.group(1), + user_name=user_name, + subpath=regex_match.group(3)) else: # public paths if not process_public_files: return False @@ -338,7 +338,7 @@ def _update_permissions_on_filesystem(self, permission: Permission) -> None: full_route = self.wps_outputs_dir # Add subpath if the resource is a child of the main wpsoutputs resource if len(res_tree) > 2: - child_route = "/".join([res["resource_name"] for res in res_tree[2:]]) + child_route = "/".join(cast(List[str], [res["resource_name"] for res in res_tree[2:]])) full_route = os.path.join(full_route, child_route) # Find all users related to the permission @@ -370,10 +370,10 @@ def _update_permissions_on_filesystem(self, permission: Permission) -> None: user_name = users[int(path_regex_match.group(2))] access_allowed = self.update_secure_data_proxy_path_perms(user_path, user_name) try: - hardlink_path = self._get_user_hardlink(src_path=user_path, - bird_name=path_regex_match.group(1), - user_name=user_name, - subpath=path_regex_match.group(3)) + hardlink_path = self.get_user_hardlink(src_path=user_path, + bird_name=path_regex_match.group(1), + user_name=user_name, + subpath=path_regex_match.group(3)) except FileNotFoundError: LOGGER.debug("Failed to find a hardlink path corresponding to the source path [%s]. The user `%s` " "should already have an existing workspace.", diff --git a/cowbird/handlers/impl/geoserver.py b/cowbird/handlers/impl/geoserver.py index 1d619b63..6187eaa2 100644 --- a/cowbird/handlers/impl/geoserver.py +++ b/cowbird/handlers/impl/geoserver.py @@ -19,7 +19,7 @@ from cowbird.permissions_synchronizer import Permission from cowbird.request_task import RequestTask from cowbird.typedefs import JSON, SettingsType -from cowbird.utils import CONTENT_TYPE_JSON, get_logger, apply_default_path_ownership, apply_new_path_permissions +from cowbird.utils import CONTENT_TYPE_JSON, apply_default_path_ownership, apply_new_path_permissions, get_logger GeoserverType: TypeAlias = "Geoserver" # need a reference for the decorator before it gets defined diff --git a/cowbird/utils.py b/cowbird/utils.py index 553b110f..7536fe9c 100644 --- a/cowbird/utils.py +++ b/cowbird/utils.py @@ -28,7 +28,7 @@ from webob.headers import EnvironHeaders, ResponseHeaders from cowbird import __meta__ -from cowbird.constants import get_constant, validate_required, DEFAULT_ADMIN_GID, DEFAULT_ADMIN_UID +from cowbird.constants import DEFAULT_ADMIN_GID, DEFAULT_ADMIN_UID, get_constant, validate_required from cowbird.typedefs import ( JSON, AnyHeadersType, diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 864c2095..cd27a726 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -405,10 +405,10 @@ def setUp(self): self.output_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{subpath}") self.wps_outputs_user_dir = filesystem_handler.get_wps_outputs_user_dir(self.test_username) - self.hardlink_path = filesystem_handler._get_user_hardlink(src_path=self.output_file, - bird_name=self.bird_name, - user_name=self.test_username, - subpath=subpath) + self.hardlink_path = filesystem_handler.get_user_hardlink(src_path=self.output_file, + bird_name=self.bird_name, + user_name=self.test_username, + subpath=subpath) # Create the test wps output file os.makedirs(os.path.dirname(self.output_file)) @@ -437,7 +437,7 @@ def create_secure_data_proxy_service(self): @staticmethod def check_path_perms_and_hardlink(src_path: str, hardlink_path: str, perms: int): """ - Checks if a path has the expected permissions, and if a hardlink exists, depending of the `other` permissions. + Checks if a path has the expected permissions, and if a hardlink exists, according to the `other` permissions. """ utils.check_path_permissions(src_path, perms) if perms & 0o006: # check if path has a read or write permission set for `other` @@ -606,7 +606,7 @@ def test_permission_updates_user_data(self): root_test_filename = "other_file.txt" root_test_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{root_test_filename}") - root_hardlink = HandlerFactory().get_handler("FileSystem")._get_user_hardlink( + root_hardlink = HandlerFactory().get_handler("FileSystem").get_user_hardlink( src_path=root_test_file, bird_name=self.bird_name, user_name=self.test_username, subpath=root_test_filename) with open(root_test_file, mode="w", encoding="utf-8"): pass @@ -687,8 +687,8 @@ def test_permission_updates_user_data(self): def test_permission_updates_wpsoutputs_data(self): """ - Tests updating permissions on data found outside of the user directories, including testing permissions - on a user and on a group. + Tests updating permissions on data found outside of the user directories, including testing permissions on a + user and on a group. """ filesystem_handler = HandlerFactory().get_handler("FileSystem") magpie_handler = HandlerFactory().get_handler("Magpie") @@ -720,16 +720,16 @@ def test_permission_updates_wpsoutputs_data(self): # This file should be ignored by following test cases, being in a group different than the test group. ignored_filename = "ignored.txt" ignored_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{ignored_user_id}/{ignored_filename}") - ignored_hardlink = filesystem_handler._get_user_hardlink( + ignored_hardlink = filesystem_handler.get_user_hardlink( src_path=ignored_file, bird_name=self.bird_name, user_name=ignored_username, subpath=ignored_filename) same_group_filename = "same_group.txt" same_group_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{same_group_user_id}/{same_group_filename}") - same_group_hardlink = filesystem_handler._get_user_hardlink(src_path=same_group_file, - bird_name=self.bird_name, - user_name=same_group_username, - subpath=same_group_filename) + same_group_hardlink = filesystem_handler.get_user_hardlink(src_path=same_group_file, + bird_name=self.bird_name, + user_name=same_group_username, + subpath=same_group_filename) for file in [self.output_file, public_file, public_subfile, ignored_file, same_group_file]: os.makedirs(os.path.dirname(file), exist_ok=True) @@ -813,8 +813,8 @@ def test_permission_updates_wpsoutputs_data(self): def test_permission_updates_other_svc(self): """ - Tests permission updates on a wpsoutputs resource from a service other than the secure-data-proxy, which - should not be processed by the filesystem handler. + Tests permission updates on a wpsoutputs resource from a service other than the secure-data-proxy, which should + not be processed by the filesystem handler. """ magpie_handler = HandlerFactory().get_handler("Magpie") # Create resources for the full route @@ -823,7 +823,7 @@ def test_permission_updates_other_svc(self): "service_name": "other_service", "service_type": ServiceAPI.service_type, "service_sync_type": ServiceAPI.service_type, - "service_url": f"http://localhost:9000/other_service", + "service_url": "http://localhost:9000/other_service", "configuration": {} } test_magpie.delete_service(magpie_handler, "other_service") From 5e539b72b8d00576d956a29a3aeb42a1ae912bcf Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 10:45:56 -0400 Subject: [PATCH 07/17] add comment about unsupported on_modified case --- cowbird/handlers/impl/filesystem.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index f038c241..e807046a 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -273,7 +273,10 @@ def on_modified(self, path: str) -> None: :param path: Absolute path of a new file/directory """ - # Nothing to do for files in the wps_outputs folder, since hardlinks are updated automatically. + # Nothing to do for files in the wps_outputs folder. + # Permission modifications (e.g.: via `chmod`) are not supported to simplify the management of wpsoutputs perms. + # Any permission modifications should be done via Magpie, which will synchronize the permissions on any related + # hardlinks automatically. def _delete_wpsoutputs_hardlink(self, src_path: str, process_user_files: bool = True, process_public_files: bool = True) -> bool: From beb9ffa63ee7a8f66278ed3eb1775fd5701acfc9 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 11:04:42 -0400 Subject: [PATCH 08/17] add doc about user wps outputs files --- docs/components.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/components.rst b/docs/components.rst index dee043fd..6d7b610e 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -63,8 +63,20 @@ user workspaces location (e.g.:``/user_workspaces/public/wpsoutputs``). When a ` `birdhouse`, the folder containing the hardlinks will be mounted as volume, so that the users have access via their `JupyterLab` instance. The volume will be made read-only to prevent a user from modifying the public data. -.. - TODO: add section on user data when implemented +The user wps outputs data is made accessible by generating hardlinks from a wps outputs data directory containing user +data to a subdirectory found in the related user's workspace. For example, with a source path +``/wpsoutputs//users///``, a hardlink is generated at the path +``/user_workspaces//wpsoutputs///``. The hardlink path uses a similar +structure as found at the source path, but removes the redundant ``users`` and ```` path segments. The +hardlink files will be automatically available to the user on a `JupyterLab` instance since the workspace is mounted as +a volume. Any file that is found under a directory ``/wpsoutputs//users//`` is considered to be +user data and any file outside is considered public. + +The permissions found on the files are synchronized with the permissions found on `Magpie`. If `Magpie` uses a +`secure-data-proxy` service, this service handles the permissions of those files. If a file does not have a +corresponding route on the `secure-data-proxy` service, it will use the closest parent permissions. If no such service +is found, the user files are assumed to be fully available with read and write permissions for the user. Note that if +the file does not have any read or write permissions, the hardlink will not be available in the user's workspace. Note that different design choices were made to respect the constraints of the file system and to prevent the user from accessing forbidden data: From d64732330fe681f8a794f8e2671ec0fa48271ad3 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 12:54:35 -0400 Subject: [PATCH 09/17] set user dirs perms to allow read and write operations on jupyterlab --- cowbird/handlers/impl/filesystem.py | 15 +++++++++++---- tests/test_filesystem.py | 11 ++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index e807046a..d94b3530 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -204,12 +204,15 @@ def update_secure_data_proxy_path_perms(self, src_path: str, user_name: str) -> return access_allowed @staticmethod - def create_hardlink_path(src_path: str, hardlink_path: str, access_allowed: bool) -> None: + def create_hardlink_path(src_path: str, hardlink_path: str, access_allowed: bool, dir_perms: int = 0o775) -> None: """ Creates a hardlink path from a source file, if the user has access rights. """ if access_allowed: - os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) + # set umask to allow all permissions, else, mode is ignored when making directories + previous_umask = os.umask(0) + os.makedirs(os.path.dirname(hardlink_path), mode=dir_perms, exist_ok=True) + os.umask(previous_umask) # reset umask to default LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) try: os.link(src_path, hardlink_path) @@ -227,6 +230,9 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, if not process_user_files: return + # User workspace directories require `rwx` permissions to allow file modifications via JupyterLab for + # files with write permissions + dir_perms = 0o777 magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) hardlink_path = self.get_user_hardlink(src_path=src_path, @@ -243,6 +249,7 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, else: # public files if not process_public_files: return + dir_perms = 0o775 hardlink_path = self._get_public_hardlink(src_path) if os.path.exists(hardlink_path): @@ -254,7 +261,7 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, "created file.", hardlink_path) os.remove(hardlink_path) - self.create_hardlink_path(src_path, hardlink_path, access_allowed) + self.create_hardlink_path(src_path, hardlink_path, access_allowed, dir_perms=dir_perms) def on_created(self, path: str) -> None: """ @@ -386,7 +393,7 @@ def _update_permissions_on_filesystem(self, permission: Permission) -> None: # Resync hardlink path if os.path.exists(hardlink_path): os.remove(hardlink_path) - self.create_hardlink_path(user_path, hardlink_path, access_allowed) + self.create_hardlink_path(user_path, hardlink_path, access_allowed, dir_perms=0o777) def permission_created(self, permission: Permission) -> None: self._update_permissions_on_filesystem(permission) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index cd27a726..2247d331 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -17,7 +17,7 @@ from cowbird.api.schemas import ValidOperations from cowbird.handlers import HandlerFactory -from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME, SECURE_DATA_PROXY_NAME +from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME, SECURE_DATA_PROXY_NAME, USER_WPS_OUTPUTS_USER_DIR_NAME from cowbird.typedefs import JSON from tests import test_magpie, utils @@ -230,6 +230,9 @@ def test_public_wps_output_created(self): TestFileSystem.check_created_test_cases(output_file, hardlink_path) + # Check that the hardlink's parent directory have the right permissions + utils.check_path_permissions(os.path.join(filesystem_handler.get_wps_outputs_public_dir(), bird_name), 0o775) + # A create event on a folder should not be processed (no corresponding target folder created) target_dir = os.path.join(filesystem_handler.get_wps_outputs_public_dir(), bird_name) shutil.rmtree(target_dir) @@ -461,6 +464,12 @@ def test_user_wps_output_created(self): TestFileSystem.check_created_test_cases(self.output_file, self.hardlink_path) + # Check that the hardlink's parent directories have the right permissions + path_to_check = self.hardlink_path + while not path_to_check.endswith(USER_WPS_OUTPUTS_USER_DIR_NAME): + path_to_check = os.path.dirname(path_to_check) + utils.check_path_permissions(path_to_check, 0o777) + # A create event on a folder should not be processed (no corresponding target folder created) src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.job_id}") target_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") From d84271ef2dd9723851ca528a733f5ca7e2bd3468 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 13:08:41 -0400 Subject: [PATCH 10/17] only set the first parent dir as writable for user data, which is the minimum requirement --- cowbird/handlers/impl/filesystem.py | 22 ++++++++++++---------- tests/test_filesystem.py | 9 +++------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index d94b3530..a27c89de 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -204,15 +204,17 @@ def update_secure_data_proxy_path_perms(self, src_path: str, user_name: str) -> return access_allowed @staticmethod - def create_hardlink_path(src_path: str, hardlink_path: str, access_allowed: bool, dir_perms: int = 0o775) -> None: + def create_hardlink_path(src_path: str, hardlink_path: str, access_allowed: bool, + is_parent_dir_writable: bool = False) -> None: """ Creates a hardlink path from a source file, if the user has access rights. """ if access_allowed: - # set umask to allow all permissions, else, mode is ignored when making directories - previous_umask = os.umask(0) - os.makedirs(os.path.dirname(hardlink_path), mode=dir_perms, exist_ok=True) - os.umask(previous_umask) # reset umask to default + os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) + apply_new_path_permissions(os.path.dirname(hardlink_path), + is_readable=True, + is_writable=is_parent_dir_writable, + is_executable=True) LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) try: os.link(src_path, hardlink_path) @@ -230,9 +232,9 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, if not process_user_files: return - # User workspace directories require `rwx` permissions to allow file modifications via JupyterLab for + # User workspace directories require write permissions to allow file modifications via JupyterLab for # files with write permissions - dir_perms = 0o777 + is_parent_dir_writable = True magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) hardlink_path = self.get_user_hardlink(src_path=src_path, @@ -249,7 +251,7 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, else: # public files if not process_public_files: return - dir_perms = 0o775 + is_parent_dir_writable = False # public files are read-only hardlink_path = self._get_public_hardlink(src_path) if os.path.exists(hardlink_path): @@ -261,7 +263,7 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, "created file.", hardlink_path) os.remove(hardlink_path) - self.create_hardlink_path(src_path, hardlink_path, access_allowed, dir_perms=dir_perms) + self.create_hardlink_path(src_path, hardlink_path, access_allowed, is_parent_dir_writable) def on_created(self, path: str) -> None: """ @@ -393,7 +395,7 @@ def _update_permissions_on_filesystem(self, permission: Permission) -> None: # Resync hardlink path if os.path.exists(hardlink_path): os.remove(hardlink_path) - self.create_hardlink_path(user_path, hardlink_path, access_allowed, dir_perms=0o777) + self.create_hardlink_path(user_path, hardlink_path, access_allowed, is_parent_dir_writable=True) def permission_created(self, permission: Permission) -> None: self._update_permissions_on_filesystem(permission) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 2247d331..05bd0ad4 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -231,7 +231,7 @@ def test_public_wps_output_created(self): TestFileSystem.check_created_test_cases(output_file, hardlink_path) # Check that the hardlink's parent directory have the right permissions - utils.check_path_permissions(os.path.join(filesystem_handler.get_wps_outputs_public_dir(), bird_name), 0o775) + utils.check_path_permissions(os.path.dirname(hardlink_path), 0o775) # A create event on a folder should not be processed (no corresponding target folder created) target_dir = os.path.join(filesystem_handler.get_wps_outputs_public_dir(), bird_name) @@ -464,11 +464,8 @@ def test_user_wps_output_created(self): TestFileSystem.check_created_test_cases(self.output_file, self.hardlink_path) - # Check that the hardlink's parent directories have the right permissions - path_to_check = self.hardlink_path - while not path_to_check.endswith(USER_WPS_OUTPUTS_USER_DIR_NAME): - path_to_check = os.path.dirname(path_to_check) - utils.check_path_permissions(path_to_check, 0o777) + # Check that the hardlink's parent directory has the right permissions + utils.check_path_permissions(os.path.dirname(self.hardlink_path), 0o777) # A create event on a folder should not be processed (no corresponding target folder created) src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.job_id}") From 621b0b2bf5801d14d366f125fe778f90cdeec156 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 14:33:28 -0400 Subject: [PATCH 11/17] update changelog --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index de5e56ae..e81ffc40 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,7 +10,7 @@ Changes Features / Changes ~~~~~~~~~~~~~~~~~~~~~ * Add monitoring to the ``FileSystem`` handler to watch wps outputs data. -* Synchronize public wps outputs data to the user workspaces folder with hardlinks for user access. +* Synchronize both public and user wps outputs data to the workspace folder with hardlinks for user access. * Add resync endpoint to trigger a handler's resync operations. Only the ``FileSystem`` handler is implemented for now, regenerating hardlinks associated to wps outputs public data. From 428e847806a5ede6b2c2a19d37bb18ad56e2d4ff Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 15:21:11 -0400 Subject: [PATCH 12/17] review some comments code --- cowbird/handlers/impl/filesystem.py | 37 +++++++++++++++++------------ cowbird/handlers/impl/magpie.py | 6 ++--- docs/components.rst | 4 ++-- tests/test_filesystem.py | 2 +- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index a27c89de..a46ccf9a 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -139,23 +139,23 @@ def _get_public_hardlink(self, src_path: str) -> str: def get_user_hardlink(self, src_path: str, bird_name: str, user_name: str, subpath: str) -> str: user_workspace_dir = self.get_user_workspace_dir(user_name) if not os.path.exists(user_workspace_dir): - raise FileNotFoundError(f"User {user_name} workspace not found at path {user_workspace_dir}. New " - f"wpsoutput {src_path} not added to the user workspace.") - + raise FileNotFoundError(f"User `{user_name}` workspace not found at path [{user_workspace_dir}]. Failed to " + f"find a hardlink path for the wpsoutput [{src_path}] source path.") + # TODO: review subpath = os.path.join(bird_name, subpath) return os.path.join(self.get_wps_outputs_user_dir(user_name), subpath) def _get_secure_data_proxy_file_perms(self, src_path: str, user_name: str) -> Tuple[bool, bool]: """ - Finds a route from the `secure-data-proxy service` that matches the resource path (or one its parent resource) - and gets the user permissions on that route. + Finds a route from the `secure-data-proxy` service that matches the resource path (or one of its parent + resource) and gets the user permissions on that route. """ magpie_handler = HandlerFactory().get_handler("Magpie") sdp_svc_info = magpie_handler.get_service_info("secure-data-proxy") # Find the closest related route resource expected_route = re.sub(rf"^{self.wps_outputs_dir}", "wpsoutputs", src_path) - # Finds the resource id of the route matching the resource or the closest matching parent route. + # Finds the resource id of the route or the closest matching parent route. closest_res_id = None resource = magpie_handler.get_resource(cast(int, sdp_svc_info["resource_id"])) for segment in expected_route.split("/"): @@ -170,7 +170,7 @@ def _get_secure_data_proxy_file_perms(self, src_path: str, user_name: str) -> Tu closest_res_id = child_res_id if not closest_res_id: - # No resource was found to be corresponding to even some part of the expected route. + # No resource corresponds to the expected route or one of its parent route. # Assume access is not allowed. is_readable = False is_writable = False @@ -195,6 +195,8 @@ def update_secure_data_proxy_path_perms(self, src_path: str, user_name: str) -> Returns a boolean to indicate if the user should have some type of access to the path or not. """ is_readable, is_writable = self._get_secure_data_proxy_file_perms(src_path, user_name) + + # Files do not require the `executable` permission. apply_new_path_permissions(src_path, is_readable, is_writable, is_executable=False) access_allowed = True @@ -211,10 +213,14 @@ def create_hardlink_path(src_path: str, hardlink_path: str, access_allowed: bool """ if access_allowed: os.makedirs(os.path.dirname(hardlink_path), exist_ok=True) + + # Set custom write permissions to the parent directory, since they are required for write permissive files + # in JupyterLab. apply_new_path_permissions(os.path.dirname(hardlink_path), is_readable=True, is_writable=is_parent_dir_writable, is_executable=True) + LOGGER.debug("Creating hardlink from file `%s` to the path `%s`", src_path, hardlink_path) try: os.link(src_path, hardlink_path) @@ -235,6 +241,7 @@ def _create_wpsoutputs_hardlink(self, src_path: str, overwrite: bool = False, # User workspace directories require write permissions to allow file modifications via JupyterLab for # files with write permissions is_parent_dir_writable = True + magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) hardlink_path = self.get_user_hardlink(src_path=src_path, @@ -283,12 +290,12 @@ def on_modified(self, path: str) -> None: :param path: Absolute path of a new file/directory """ # Nothing to do for files in the wps_outputs folder. - # Permission modifications (e.g.: via `chmod`) are not supported to simplify the management of wpsoutputs perms. - # Any permission modifications should be done via Magpie, which will synchronize the permissions on any related - # hardlinks automatically. + # Permission modifications (e.g.: via `chmod`) are not supported to simplify the management of wpsoutputs + # permissions. Any permission modifications should be done via Magpie, which will synchronize the permissions on + # any related hardlinks automatically. def _delete_wpsoutputs_hardlink(self, src_path: str, - process_user_files: bool = True, process_public_files: bool = True) -> bool: + process_user_paths: bool = True, process_public_paths: bool = True) -> bool: """ Deletes the hardlink path that corresponds to the input source path. @@ -297,7 +304,7 @@ def _delete_wpsoutputs_hardlink(self, src_path: str, regex_match = re.search(self.wps_outputs_user_data_regex, src_path) try: if regex_match: # user paths - if not process_user_files: + if not process_user_paths: return False magpie_handler = HandlerFactory().get_handler("Magpie") user_name = magpie_handler.get_user_name_from_user_id(int(regex_match.group(2))) @@ -306,7 +313,7 @@ def _delete_wpsoutputs_hardlink(self, src_path: str, user_name=user_name, subpath=regex_match.group(3)) else: # public paths - if not process_public_files: + if not process_public_paths: return False linked_path = self._get_public_hardlink(src_path) if os.path.isdir(linked_path): @@ -331,7 +338,7 @@ def on_deleted(self, path: str) -> None: @staticmethod def _check_if_res_from_secure_data_proxy(res_tree: List[JSON]) -> bool: """ - Checks if the resource if part of a secure-data-proxy service of type API. + Checks if the resource if part of a `secure-data-proxy` service of type API. """ root_res_info = res_tree[0] if root_res_info["resource_name"] == SECURE_DATA_PROXY_NAME: @@ -349,7 +356,7 @@ def _update_permissions_on_filesystem(self, permission: Permission) -> None: if self._check_if_res_from_secure_data_proxy(res_tree): full_route = self.wps_outputs_dir # Add subpath if the resource is a child of the main wpsoutputs resource - if len(res_tree) > 2: + if len(res_tree) > 2: # /secure-data-proxy/wpsoutputs/ child_route = "/".join(cast(List[str], [res["resource_name"] for res in res_tree[2:]])) full_route = os.path.join(full_route, child_route) diff --git a/cowbird/handlers/impl/magpie.py b/cowbird/handlers/impl/magpie.py index be98bd2d..daa00c51 100644 --- a/cowbird/handlers/impl/magpie.py +++ b/cowbird/handlers/impl/magpie.py @@ -218,7 +218,7 @@ def get_geoserver_layer_res_id(self, workspace_name: str, layer_name: str, creat def get_user_list(self) -> List[str]: """ - Returns the list of all Magpie users. + Returns the list of all Magpie usernames. """ resp = self._send_request(method="GET", url=f"{self.url}/users", params={"detail": False}) if resp.status_code != 200: @@ -244,7 +244,7 @@ def get_user_name_from_user_id(self, user_id: int) -> str: for user_info in resp.json()["users"]: if "user_id" in user_info and user_info["user_id"] == user_id: return user_info["user_name"] - raise MagpieHttpError(f"Could not find any user with the id {user_id}.") + raise MagpieHttpError(f"Could not find any user with the id `{user_id}`.") def get_user_permissions(self, user: str) -> Dict[str, JSON]: """ @@ -270,7 +270,7 @@ def get_user_names_by_group_name(self, grp_name: str) -> List[str]: """ resp = self._send_request(method="GET", url=f"{self.url}/groups/{grp_name}/users") if resp.status_code != 200: - raise MagpieHttpError(f"Could not find the list of usernames from group {grp_name}. " + raise MagpieHttpError(f"Could not find the list of usernames from group `{grp_name}`. " f"HttpError {resp.status_code} : {resp.text}") return resp.json()["user_names"] diff --git a/docs/components.rst b/docs/components.rst index 6d7b610e..57be3c73 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -67,10 +67,10 @@ The user wps outputs data is made accessible by generating hardlinks from a wps data to a subdirectory found in the related user's workspace. For example, with a source path ``/wpsoutputs//users///``, a hardlink is generated at the path ``/user_workspaces//wpsoutputs///``. The hardlink path uses a similar -structure as found at the source path, but removes the redundant ``users`` and ```` path segments. The +structure as found in the source path, but removes the redundant ``users`` and ```` path segments. The hardlink files will be automatically available to the user on a `JupyterLab` instance since the workspace is mounted as a volume. Any file that is found under a directory ``/wpsoutputs//users//`` is considered to be -user data and any file outside is considered public. +user data and any outside file is considered public. The permissions found on the files are synchronized with the permissions found on `Magpie`. If `Magpie` uses a `secure-data-proxy` service, this service handles the permissions of those files. If a file does not have a diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 05bd0ad4..3c7ce8f7 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -68,7 +68,7 @@ def get_test_app(self, cfg_data: JSON) -> TestApp: @staticmethod def check_created_test_cases(output_path, hardlink_path): """ - Runs multiple test cases for the creation of hardlinks, which are the same for public and user files. + Runs multiple test cases, common to the public and user files, for the creation of hardlinks. """ # Make sure the hardlink doesn't already exist for the test cases Path(hardlink_path).unlink(missing_ok=True) From 1f319a879749abda46c943350c7e8750f2afc023 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Tue, 8 Aug 2023 17:15:00 -0400 Subject: [PATCH 13/17] review filesystem tests --- cowbird/handlers/impl/filesystem.py | 3 +- tests/test_filesystem.py | 138 +++++++++++++++------------- 2 files changed, 75 insertions(+), 66 deletions(-) diff --git a/cowbird/handlers/impl/filesystem.py b/cowbird/handlers/impl/filesystem.py index a46ccf9a..4bb1591d 100644 --- a/cowbird/handlers/impl/filesystem.py +++ b/cowbird/handlers/impl/filesystem.py @@ -141,7 +141,6 @@ def get_user_hardlink(self, src_path: str, bird_name: str, user_name: str, subpa if not os.path.exists(user_workspace_dir): raise FileNotFoundError(f"User `{user_name}` workspace not found at path [{user_workspace_dir}]. Failed to " f"find a hardlink path for the wpsoutput [{src_path}] source path.") - # TODO: review subpath = os.path.join(bird_name, subpath) return os.path.join(self.get_wps_outputs_user_dir(user_name), subpath) @@ -189,7 +188,7 @@ def _get_secure_data_proxy_file_perms(self, src_path: str, user_name: str) -> Tu def update_secure_data_proxy_path_perms(self, src_path: str, user_name: str) -> bool: """ - Gets a path's permissions from the secure-data-proxy service and updates the file system permissions + Gets a path's permissions from the `secure-data-proxy` service and updates the file system permissions accordingly. Returns a boolean to indicate if the user should have some type of access to the path or not. diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 3c7ce8f7..97b1f639 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -17,7 +17,7 @@ from cowbird.api.schemas import ValidOperations from cowbird.handlers import HandlerFactory -from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME, SECURE_DATA_PROXY_NAME, USER_WPS_OUTPUTS_USER_DIR_NAME +from cowbird.handlers.impl.filesystem import NOTEBOOKS_DIR_NAME, SECURE_DATA_PROXY_NAME from cowbird.typedefs import JSON from tests import test_magpie, utils @@ -403,25 +403,24 @@ def setUp(self): self.job_id = 1 self.bird_name = "weaver" - self.output_filename = "test_output.txt" - subpath = f"{self.job_id}/{self.output_filename}" - self.output_file = os.path.join(self.wpsoutputs_dir, - f"{self.bird_name}/users/{self.user_id}/{subpath}") + self.test_filename = "test_output.txt" + subpath = f"{self.job_id}/{self.test_filename}" + self.test_file = os.path.join(self.wpsoutputs_dir, + f"{self.bird_name}/users/{self.user_id}/{subpath}") self.wps_outputs_user_dir = filesystem_handler.get_wps_outputs_user_dir(self.test_username) - self.hardlink_path = filesystem_handler.get_user_hardlink(src_path=self.output_file, + self.test_hardlink = filesystem_handler.get_user_hardlink(src_path=self.test_file, bird_name=self.bird_name, user_name=self.test_username, subpath=subpath) # Create the test wps output file - os.makedirs(os.path.dirname(self.output_file)) - with open(self.output_file, mode="w", encoding="utf-8"): + os.makedirs(os.path.dirname(self.test_file)) + with open(self.test_file, mode="w", encoding="utf-8"): pass - os.chmod(self.output_file, 0o666) + os.chmod(self.test_file, 0o666) - self.secure_data_proxy_name = SECURE_DATA_PROXY_NAME # Delete the service if it already exists - test_magpie.delete_service(magpie_handler, self.secure_data_proxy_name) + test_magpie.delete_service(magpie_handler, SECURE_DATA_PROXY_NAME) def create_secure_data_proxy_service(self): """ @@ -429,10 +428,10 @@ def create_secure_data_proxy_service(self): """ # Create service data = { - "service_name": self.secure_data_proxy_name, + "service_name": SECURE_DATA_PROXY_NAME, "service_type": ServiceAPI.service_type, "service_sync_type": ServiceAPI.service_type, - "service_url": f"http://localhost:9000/{self.secure_data_proxy_name}", + "service_url": f"http://localhost:9000/{SECURE_DATA_PROXY_NAME}", "configuration": {} } return test_magpie.create_service(HandlerFactory().get_handler("Magpie"), data) @@ -445,8 +444,10 @@ def check_path_perms_and_hardlink(src_path: str, hardlink_path: str, perms: int) utils.check_path_permissions(src_path, perms) if perms & 0o006: # check if path has a read or write permission set for `other` assert os.path.exists(hardlink_path) + assert os.stat(hardlink_path).st_nlink == 2 else: assert not os.path.exists(hardlink_path) + assert os.stat(src_path).st_nlink == 1 def test_user_wps_output_created(self): """ @@ -457,19 +458,23 @@ def test_user_wps_output_created(self): # Error expected if the user workspace does not exist shutil.rmtree(filesystem_handler.get_user_workspace_dir(self.test_username)) with pytest.raises(FileNotFoundError): - filesystem_handler.on_created(self.output_file) + filesystem_handler.on_created(self.test_file) # Create the user workspace filesystem_handler.user_created(self.test_username) - TestFileSystem.check_created_test_cases(self.output_file, self.hardlink_path) + TestFileSystem.check_created_test_cases(self.test_file, self.test_hardlink) # Check that the hardlink's parent directory has the right permissions - utils.check_path_permissions(os.path.dirname(self.hardlink_path), 0o777) + utils.check_path_permissions(os.path.dirname(self.test_hardlink), 0o777) # A create event on a folder should not be processed (no corresponding target folder created) - src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.job_id}") - target_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") + subpath = str(self.job_id) + src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{subpath}") + target_dir = filesystem_handler.get_user_hardlink(src_path=src_dir, + bird_name=self.bird_name, + user_name=self.test_username, + subpath=subpath) shutil.rmtree(target_dir) filesystem_handler.on_created(src_dir) assert not os.path.exists(target_dir) @@ -483,7 +488,7 @@ def test_user_created(self): # Simulate a user_created event and check that the expected hardlink is generated. filesystem_handler.user_created(self.test_username) - assert os.stat(self.hardlink_path).st_nlink == 2 + assert os.stat(self.test_hardlink).st_nlink == 2 def test_user_wps_output_created_secure_data_proxy(self): """ @@ -495,28 +500,28 @@ def test_user_wps_output_created_secure_data_proxy(self): svc_id = self.create_secure_data_proxy_service() # Note that the following test cases are made to be executed in a specific order and are not interchangeable. + # Each permission case specifies the permission's name and access to add and the resulting expected file perms. test_cases = [{ - # If secure-data-proxy service exists but no route is defined for wpsoutputs, - # assume access is not allowed and check if no hardlink is created. + # If secure-data-proxy service exists but no route is defined for wpsoutputs, assume access is not allowed. "routes_to_create": [], - "permissions_cases": [("", "", False, 0o660)] + "permissions_cases": [("", "", 0o660)] }, { # Permission applied only on a parent resource # If the route is only defined on a parent resource and no route are defined for the actual file, - # assume access is the same as the access of the parent, and hardlink should be created accordingly. + # assume access is the same as the access of the parent. "routes_to_create": ["wpsoutputs"], - "permissions_cases": [(Permission.READ.value, Access.DENY.value, False, 0o660), - (Permission.READ.value, Access.ALLOW.value, True, 0o664), - (Permission.WRITE.value, Access.ALLOW.value, True, 0o666), - (Permission.WRITE.value, Access.DENY.value, True, 0o664)] + "permissions_cases": [(Permission.READ.value, Access.DENY.value, 0o660), + (Permission.READ.value, Access.ALLOW.value, 0o664), + (Permission.WRITE.value, Access.ALLOW.value, 0o666), + (Permission.WRITE.value, Access.DENY.value, 0o664)] }, { # Permission applied on the actual resource - Test access with an exact route match # Create the rest of the route to get a route that match the actual full path of the resource - "routes_to_create": re.sub(rf"^{self.wpsoutputs_dir}", "", self.output_file).strip("/").split("/"), - "permissions_cases": [(Permission.READ.value, Access.DENY.value, False, 0o660), - (Permission.READ.value, Access.ALLOW.value, True, 0o664), - (Permission.WRITE.value, Access.ALLOW.value, True, 0o666), - (Permission.WRITE.value, Access.DENY.value, True, 0o664)]}] + "routes_to_create": re.sub(rf"^{self.wpsoutputs_dir}", "", self.test_file).strip("/").split("/"), + "permissions_cases": [(Permission.READ.value, Access.DENY.value, 0o660), + (Permission.READ.value, Access.ALLOW.value, 0o664), + (Permission.WRITE.value, Access.ALLOW.value, 0o666), + (Permission.WRITE.value, Access.DENY.value, 0o664)]}] # Resource id of the last existing route resource found from the path of the test file last_res_id = svc_id @@ -524,7 +529,7 @@ def test_user_wps_output_created_secure_data_proxy(self): # Create routes found in list for route in test_case["routes_to_create"]: last_res_id = magpie_handler.create_resource(route, Route.resource_type_name, last_res_id) - for perm_name, perm_access, expecting_created_file, expected_file_perms in test_case["permissions_cases"]: + for perm_name, perm_access, expected_file_perms in test_case["permissions_cases"]: # Reset hardlink file for each test shutil.rmtree(self.wps_outputs_user_dir, ignore_errors=True) @@ -538,9 +543,8 @@ def test_user_wps_output_created_secure_data_proxy(self): perm_scope=Scope.MATCH.value) # Check if file is created according to permissions - filesystem_handler.on_created(self.output_file) - assert expecting_created_file == os.path.exists(self.hardlink_path) - utils.check_path_permissions(self.output_file, expected_file_perms) + filesystem_handler.on_created(self.test_file) + self.check_path_perms_and_hardlink(self.test_file, self.test_hardlink, expected_file_perms) def test_user_wps_output_deleted(self): """ @@ -550,10 +554,10 @@ def test_user_wps_output_deleted(self): # Basic test cases for deleting user wps outputs. More extensive delete test cases are done in the public tests. # Test deleting a user file. - filesystem_handler.on_created(self.output_file) - assert os.path.exists(self.hardlink_path) - filesystem_handler.on_deleted(self.output_file) - assert not os.path.exists(self.hardlink_path) + filesystem_handler.on_created(self.test_file) + assert os.path.exists(self.test_hardlink) + filesystem_handler.on_deleted(self.test_file) + assert not os.path.exists(self.test_hardlink) # Test deleting a user directory src_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{self.job_id}") @@ -566,7 +570,7 @@ def test_resync(self): """ Tests resync operation on wpsoutputs user data. """ - + filesystem_handler = HandlerFactory().get_handler("FileSystem") test_dir = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/{self.job_id}") # Create a file in a subfolder of the linked folder that should be removed by the resync @@ -580,20 +584,25 @@ def test_resync(self): os.mkdir(old_subdir) # Create a new empty dir (should not appear in the resynced wpsoutputs since only files are processed) - new_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/new_dir") + subpath = "new_dir" + new_dir = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{subpath}") os.mkdir(new_dir) - new_dir_linked_path = os.path.join(self.wps_outputs_user_dir, f"{self.bird_name}/new_dir") + + new_dir_linked_path = filesystem_handler.get_user_hardlink(src_path=new_dir, + bird_name=self.bird_name, + user_name=self.test_username, + subpath=subpath) # Check that old files exist before applying the resync - assert not os.path.exists(self.hardlink_path) + assert not os.path.exists(self.test_hardlink) assert os.path.exists(old_nested_file) assert os.path.exists(old_subdir) resp = utils.test_request(self.app, "PUT", "/handlers/FileSystem/resync") - # Check that new hardlinks are generated and old files are removed + # Check that hardlink is generated and old files are removed assert resp.status_code == 200 - assert os.stat(self.hardlink_path).st_nlink == 2 + assert os.stat(self.test_hardlink).st_nlink == 2 assert not os.path.exists(new_dir_linked_path) assert not os.path.exists(old_nested_file) assert not os.path.exists(old_subdir) @@ -607,8 +616,8 @@ def test_permission_updates_user_data(self): svc_id = self.create_secure_data_proxy_service() # Prepare test files - subdir_test_file = self.output_file - subdir_hardlink = self.hardlink_path + subdir_test_file = self.test_file + subdir_hardlink = self.test_hardlink root_test_filename = "other_file.txt" root_test_file = os.path.join(self.wpsoutputs_dir, f"{self.bird_name}/users/{self.user_id}/{root_test_filename}") @@ -621,7 +630,7 @@ def test_permission_updates_user_data(self): # Prepare test routes user_dir_res_id = None - routes = re.sub(rf"^{self.wpsoutputs_dir}", "wpsoutputs", self.output_file).strip("/") + routes = re.sub(rf"^{self.wpsoutputs_dir}", "wpsoutputs", self.test_file).strip("/") last_res_id = svc_id for route in routes.split("/"): last_res_id = magpie_handler.create_resource(route, Route.resource_type_name, last_res_id) @@ -629,7 +638,9 @@ def test_permission_updates_user_data(self): user_dir_res_id = last_res_id if not user_dir_res_id: raise ValueError("Missing resource id for the user directory, invalid test.") + # subdir file resource subdir_file_res_id = last_res_id + # root file resource magpie_handler.create_resource(root_test_filename, Route.resource_type_name, user_dir_res_id) data = { @@ -698,8 +709,10 @@ def test_permission_updates_wpsoutputs_data(self): """ filesystem_handler = HandlerFactory().get_handler("FileSystem") magpie_handler = HandlerFactory().get_handler("Magpie") - # Create resources for the full route + + # Create resources svc_id = self.create_secure_data_proxy_service() + res_id = magpie_handler.create_resource("wpsoutputs", Route.resource_type_name, svc_id) # Create other user from a group different than the test group test_magpie.delete_group(magpie_handler, "others") @@ -737,15 +750,12 @@ def test_permission_updates_wpsoutputs_data(self): user_name=same_group_username, subpath=same_group_filename) - for file in [self.output_file, public_file, public_subfile, ignored_file, same_group_file]: + for file in [self.test_file, public_file, public_subfile, ignored_file, same_group_file]: os.makedirs(os.path.dirname(file), exist_ok=True) with open(file, mode="w", encoding="utf-8"): pass os.chmod(file, 0o660) - # Create resource - res_id = magpie_handler.create_resource("wpsoutputs", Route.resource_type_name, svc_id) - data = { "event": ValidOperations.CreateOperation.value, "service_name": None, @@ -760,10 +770,10 @@ def test_permission_updates_wpsoutputs_data(self): } magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], data["scope"], user_name=data["user"]) - # Check that perms was only updated for concerned user files + # Check that perms are only updated for concerned user files resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) assert resp.status_code == 200 - self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o664) + self.check_path_perms_and_hardlink(self.test_file, self.test_hardlink, 0o664) self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o660) utils.check_path_permissions(public_file, 0o660) @@ -777,7 +787,7 @@ def test_permission_updates_wpsoutputs_data(self): grp_name=data["group"]) resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) assert resp.status_code == 200 - self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o666) + self.check_path_perms_and_hardlink(self.test_file, self.test_hardlink, 0o666) self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o662) utils.check_path_permissions(public_file, 0o660) @@ -789,16 +799,16 @@ def test_permission_updates_wpsoutputs_data(self): magpie_handler.create_permission_by_res_id(data["resource_id"], data["name"], data["access"], data["scope"], grp_name=data["group"]) utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) - self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o666) + self.check_path_perms_and_hardlink(self.test_file, self.test_hardlink, 0o666) - # Test deleting a specific user permission + # Test deleting a specific user permission, removing read-allow on user data["event"] = ValidOperations.DeleteOperation.value data["user"] = self.test_username data["group"] = None magpie_handler.delete_permission_by_user_and_res_id(data["user"], data["resource_id"], data["name"]) resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) assert resp.status_code == 200 - self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o662) + self.check_path_perms_and_hardlink(self.test_file, self.test_hardlink, 0o662) self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o662) utils.check_path_permissions(public_file, 0o660) @@ -811,7 +821,7 @@ def test_permission_updates_wpsoutputs_data(self): magpie_handler.delete_permission_by_grp_and_res_id(data["group"], data["resource_id"], data["name"]) resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) assert resp.status_code == 200 - self.check_path_perms_and_hardlink(self.output_file, self.hardlink_path, 0o660) + self.check_path_perms_and_hardlink(self.test_file, self.test_hardlink, 0o660) self.check_path_perms_and_hardlink(ignored_file, ignored_hardlink, 0o660) self.check_path_perms_and_hardlink(same_group_file, same_group_hardlink, 0o660) utils.check_path_permissions(public_file, 0o660) @@ -837,7 +847,7 @@ def test_permission_updates_other_svc(self): # Create resource associated with the test file, on the other service last_res_id = other_svc_id - routes = re.sub(rf"^{self.wpsoutputs_dir}", "", self.output_file).strip("/") + routes = re.sub(rf"^{self.wpsoutputs_dir}", "wpsoutputs", self.test_file).strip("/") for route in routes.split("/"): last_res_id = magpie_handler.create_resource(route, Route.resource_type_name, last_res_id) @@ -856,7 +866,7 @@ def test_permission_updates_other_svc(self): # Check that a delete event on the resource of the other service does not modify the file permissions. resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) assert resp.status_code == 200 - utils.check_path_permissions(self.output_file, 0o666) + utils.check_path_permissions(self.test_file, 0o666) # Check that a create event on the resource of the other service does not modify the file permissions. data["event"] = ValidOperations.CreateOperation.value @@ -865,4 +875,4 @@ def test_permission_updates_other_svc(self): data["scope"], data["user"]) resp = utils.test_request(self.app, "POST", "/webhooks/permissions", json=data) assert resp.status_code == 200 - utils.check_path_permissions(self.output_file, 0o666) + utils.check_path_permissions(self.test_file, 0o666) From 9aa981bef9eb03c6f6c4b763d376168e285add9c Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Wed, 9 Aug 2023 07:56:25 -0400 Subject: [PATCH 14/17] adjust doc on user wpsoutputs --- docs/components.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/components.rst b/docs/components.rst index 57be3c73..06f6fbaa 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -74,9 +74,12 @@ user data and any outside file is considered public. The permissions found on the files are synchronized with the permissions found on `Magpie`. If `Magpie` uses a `secure-data-proxy` service, this service handles the permissions of those files. If a file does not have a -corresponding route on the `secure-data-proxy` service, it will use the closest parent permissions. If no such service -is found, the user files are assumed to be fully available with read and write permissions for the user. Note that if -the file does not have any read or write permissions, the hardlink will not be available in the user's workspace. +corresponding route on the `secure-data-proxy` service, it will use the closest parent permissions. Note that the route +resources found under the `secure-data-proxy` service must match exactly a path on the filesystem, starting with the +directory name ``wpsoutputs``, and following with the desired children directories/file names. If no `secure-data-proxy` +service is found, the user files are assumed to be fully available with read and write permissions for the user. Note +that if the file does not have any read or write permissions, the hardlink will not be available in the user's +workspace. Note that different design choices were made to respect the constraints of the file system and to prevent the user from accessing forbidden data: From 69047fb885137e0e1cab734bb4af070ecccbfdfa Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Wed, 9 Aug 2023 08:36:26 -0400 Subject: [PATCH 15/17] add logging for test util function to check perms --- tests/utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index 603e6297..f56e48a8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -28,6 +28,7 @@ NullType, SingletonMeta, get_header, + get_logger, get_settings_from_config_ini, is_null, null @@ -37,6 +38,7 @@ TEST_INI_FILE = os.path.join(COWBIRD_ROOT, "config/cowbird.example.ini") TEST_CFG_FILE = os.path.join(COWBIRD_ROOT, "config/config.example.yml") +LOGGER = get_logger(__name__) class TestAppContainer(object): test_app: Optional[TestApp] = None @@ -713,7 +715,13 @@ def check_path_permissions(path: Union[str, os.PathLike], permissions: int) -> N """ Checks if the path has the right permissions, by verifying the last digits of the octal permissions. """ - assert oct(os.stat(path)[ST_MODE] & 0o777) == oct(permissions & 0o777) + expected_perms = oct(permissions & 0o777) + actual_perms = oct(os.stat(path)[ST_MODE] & 0o777) + try: + assert actual_perms == expected_perms + except AssertionError as err: + LOGGER.error("Actual permissions `%s` not equal to expected permissions `%s`.", actual_perms, expected_perms) + raise err def check_mock_has_calls(mocked_fct, calls): From c696babdbcace15fa9ad2becb55cedee2b78f566 Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Wed, 9 Aug 2023 08:40:50 -0400 Subject: [PATCH 16/17] fix lint --- tests/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils.py b/tests/utils.py index f56e48a8..6bbed766 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -40,6 +40,7 @@ LOGGER = get_logger(__name__) + class TestAppContainer(object): test_app: Optional[TestApp] = None app: Optional[TestApp] = None From a22e5d1967a476277bd15a42c06f0ee8ea3dac6c Mon Sep 17 00:00:00 2001 From: Charles-William Cummings Date: Wed, 9 Aug 2023 08:51:16 -0400 Subject: [PATCH 17/17] check only others permissions for directories during tests --- tests/test_filesystem.py | 4 ++-- tests/utils.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 97b1f639..dda1395f 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -231,7 +231,7 @@ def test_public_wps_output_created(self): TestFileSystem.check_created_test_cases(output_file, hardlink_path) # Check that the hardlink's parent directory have the right permissions - utils.check_path_permissions(os.path.dirname(hardlink_path), 0o775) + utils.check_path_permissions(os.path.dirname(hardlink_path), 0o005, check_others_only=True) # A create event on a folder should not be processed (no corresponding target folder created) target_dir = os.path.join(filesystem_handler.get_wps_outputs_public_dir(), bird_name) @@ -466,7 +466,7 @@ def test_user_wps_output_created(self): TestFileSystem.check_created_test_cases(self.test_file, self.test_hardlink) # Check that the hardlink's parent directory has the right permissions - utils.check_path_permissions(os.path.dirname(self.test_hardlink), 0o777) + utils.check_path_permissions(os.path.dirname(self.test_hardlink), 0o007, check_others_only=True) # A create event on a folder should not be processed (no corresponding target folder created) subpath = str(self.job_id) diff --git a/tests/utils.py b/tests/utils.py index 6bbed766..599e188e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -712,12 +712,15 @@ def check_error_param_structure(body: JSON, check_val_equal(body["param"]["compare"], param_compare) -def check_path_permissions(path: Union[str, os.PathLike], permissions: int) -> None: +def check_path_permissions(path: Union[str, os.PathLike], permissions: int, check_others_only: bool = False) -> None: """ Checks if the path has the right permissions, by verifying the last digits of the octal permissions. """ - expected_perms = oct(permissions & 0o777) - actual_perms = oct(os.stat(path)[ST_MODE] & 0o777) + check_mask = 0o777 + if check_others_only: + check_mask = 0o007 + expected_perms = oct(permissions & check_mask) + actual_perms = oct(os.stat(path)[ST_MODE] & check_mask) try: assert actual_perms == expected_perms except AssertionError as err: