diff --git a/html/_modules/index.html b/html/_modules/index.html index 99168c4c..f62315ab 100644 --- a/html/_modules/index.html +++ b/html/_modules/index.html @@ -114,7 +114,10 @@

All modules for which code is available

  • lomas_server.private_dataset.private_dataset
  • lomas_server.private_dataset.s3_dataset
  • lomas_server.private_dataset.utils
  • +
  • lomas_server.tests.test_api
  • lomas_server.tests.test_dummy_generation
  • +
  • lomas_server.tests.test_mongodb_admin
  • +
  • lomas_server.tests.test_mongodb_admin_cli
  • lomas_server.utils.anti_timing_att
  • lomas_server.utils.collections_models
  • lomas_server.utils.config
  • diff --git a/html/_modules/lomas_server/tests/test_api.html b/html/_modules/lomas_server/tests/test_api.html new file mode 100644 index 00000000..8ceff16c --- /dev/null +++ b/html/_modules/lomas_server/tests/test_api.html @@ -0,0 +1,1051 @@ + + + + + + + + lomas_server.tests.test_api — Lomas 0.0.1 documentation + + + + + + + + + + + + + + + + +
    + + +
    + +
    +
    +
    + +
    +
    +
    +
    + +

    Source code for lomas_server.tests.test_api

    +import json
    +import os
    +import unittest
    +from io import StringIO
    +
    +from fastapi import status
    +from fastapi.testclient import TestClient
    +from opendp.mod import enable_features
    +import opendp.prelude as dp_p
    +from opendp_logger import enable_logging
    +import pandas as pd
    +from pymongo.database import Database
    +
    +from admin_database.utils import get_mongodb, database_factory
    +from mongodb_admin import (
    +    add_datasets_via_yaml,
    +    add_users_via_yaml,
    +    drop_collection,
    +)
    +from app import app
    +from constants import DatasetStoreType, EPSILON_LIMIT, DPLibraries
    +from tests.constants import (
    +    ENV_MONGO_INTEGRATION,
    +    ENV_S3_INTEGRATION,
    +    TRUE_VALUES,
    +)
    +from utils.config import CONFIG_LOADER
    +from utils.error_handler import InternalServerException
    +from utils.example_inputs import (
    +    DUMMY_NB_ROWS,
    +    PENGUIN_DATASET,
    +    SMARTNOISE_QUERY_DELTA,
    +    SMARTNOISE_QUERY_EPSILON,
    +    example_dummy_opendp,
    +    example_dummy_smartnoise_sql,
    +    example_get_admin_db_data,
    +    example_get_dummy_dataset,
    +    example_opendp,
    +    example_smartnoise_sql,
    +    example_smartnoise_sql_cost,
    +)
    +
    +INITAL_EPSILON = 10
    +INITIAL_DELTA = 0.005
    +
    +enable_features("floating-point")
    +
    +
    +
    +[docs] +class TestRootAPIEndpoint(unittest.TestCase): # pylint: disable=R0904 + """ + End-to-end tests of the api endpoints. + + This test can be both executed as an integration test + (enabled by setting LOMAS_TEST_MONGO_INTEGRATION to True), + or a standard test. The first requires a mongodb to be started + before running while the latter will use a local YamlDatabase. + """ + +
    +[docs] + @classmethod + def setUpClass(cls) -> None: + # Read correct config depending on the database we test against + if os.getenv(ENV_MONGO_INTEGRATION, "0").lower() in TRUE_VALUES: + CONFIG_LOADER.load_config( + config_path="tests/test_configs/test_config_mongo.yaml", + secrets_path="tests/test_configs/test_secrets.yaml", + ) + else: + CONFIG_LOADER.load_config( + config_path="tests/test_configs/test_config.yaml", + secrets_path="tests/test_configs/test_secrets.yaml", + )
    + + +
    +[docs] + @classmethod + def tearDownClass(cls) -> None: + pass
    + + +
    +[docs] + def setUp(self) -> None: + """_summary_""" + self.user_name = "Dr. Antartica" + self.dataset = PENGUIN_DATASET + self.headers = { + "Content-type": "application/json", + "Accept": "*/*", + } + self.headers["user-name"] = self.user_name + + # Fill up database if needed + if os.getenv(ENV_MONGO_INTEGRATION, "0").lower() in TRUE_VALUES: + self.db: Database = get_mongodb() + + add_users_via_yaml( + self.db, + yaml_file="tests/test_data/test_user_collection.yaml", + clean=True, + overwrite=True, + ) + + if os.getenv(ENV_S3_INTEGRATION, "0").lower() in TRUE_VALUES: + yaml_file = "tests/test_data/test_datasets_with_s3.yaml" + else: + yaml_file = "tests/test_data/test_datasets.yaml" + + add_datasets_via_yaml( + self.db, + yaml_file=yaml_file, + clean=True, + overwrite_datasets=True, + overwrite_metadata=True, + )
    + + +
    +[docs] + def tearDown(self) -> None: + # Clean up database if needed + if os.getenv(ENV_MONGO_INTEGRATION, "0").lower() in TRUE_VALUES: + drop_collection(self.db, "metadata") + drop_collection(self.db, "datasets") + drop_collection(self.db, "users") + drop_collection(self.db, "queries_archives")
    + + +
    +[docs] + def test_config_and_internal_server_exception(self) -> None: + """Test set wrong configuration""" + config = CONFIG_LOADER.get_config() + + # Put unknown admin database + previous_admin_db = config.admin_database.db_type + config.admin_database.db_type = "wrong_db" + with self.assertRaises(InternalServerException) as context: + database_factory(config.admin_database) + self.assertEqual( + str(context.exception), "Database type wrong_db not supported." + ) + # Put original state back + config.admin_database.db_type = previous_admin_db
    + + +
    +[docs] + def test_root(self) -> None: + """Test root endpoint redirection to state endpoint""" + with TestClient(app, headers=self.headers) as client: + response_root = client.get("/", headers=self.headers) + response_state = client.get("/state", headers=self.headers) + assert response_root.status_code == response_state.status_code + assert json.loads( + response_root.content.decode("utf8") + ) == json.loads(response_state.content.decode("utf8"))
    + + +
    +[docs] + def test_state(self) -> None: + """Test state endpoint""" + with TestClient(app, headers=self.headers) as client: + response = client.get("/state", headers=self.headers) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["state"]["LIVE"]
    + + +
    +[docs] + def test_memory_usage(self) -> None: + """Test memory usage endpoint""" + with TestClient(app, headers=self.headers) as client: + config = CONFIG_LOADER.get_config() + if config.dataset_store.ds_store_type == DatasetStoreType.LRU: + # Test before adding data + response = client.get( + "/get_memory_usage", headers=self.headers + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["memory_usage"] == 0 + + # Test after adding data + response = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=self.headers, + ) + assert response.status_code == status.HTTP_200_OK + + response = client.get( + "/get_memory_usage", headers=self.headers + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["memory_usage"] > 0
    + + +
    +[docs] + def test_get_dataset_metadata(self) -> None: + """test_get_dataset_metadata""" + with TestClient(app) as client: + # Expect to work + response = client.post( + "/get_dataset_metadata", + json=example_get_admin_db_data, + headers=self.headers, + ) + assert response.status_code == status.HTTP_200_OK + + metadata = json.loads(response.content.decode("utf8")) + assert isinstance(metadata, dict), "metadata should be a dict" + assert "max_ids" in metadata, "max_ids should be in metadata" + assert "row_privacy" in metadata, "max_ids should be in metadata" + assert "columns" in metadata, "columns should be in metadata" + + # Expect to fail: dataset does not exist + fake_dataset = "I_do_not_exist" + response = client.post( + "/get_dataset_metadata", + json={"dataset_name": fake_dataset}, + headers=self.headers, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": f"Dataset {fake_dataset} does not " + + "exists. Please, verify the client object initialisation." + }
    + + +
    +[docs] + def test_get_dummy_dataset(self) -> None: + """test_get_dummy_dataset""" + with TestClient(app) as client: + # Expect to work + response = client.post( + "/get_dummy_dataset", json=example_get_dummy_dataset + ) + assert response.status_code == status.HTTP_200_OK + + data = response.content.decode("utf8") + df = pd.read_csv(StringIO(data)) + assert isinstance( + df, pd.DataFrame + ), "Response should be a pd.DataFrame" + assert ( + df.shape[0] == DUMMY_NB_ROWS + ), "Dummy pd.DataFrame does not have expected number of rows" + + # Expect to fail: dataset does not exist + fake_dataset = "I_do_not_exist" + response = client.post( + "/get_dummy_dataset", + json={ + "dataset_name": fake_dataset, + "dummy_nb_rows": DUMMY_NB_ROWS, + "dummy_seed": 0, + }, + headers=self.headers, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": f"Dataset {fake_dataset} does not " + + "exists. Please, verify the client object initialisation." + } + + # Expect to fail: missing argument dummy_nb_rows + response = client.post( + "/get_dummy_dataset", + json={ + "dataset_name": PENGUIN_DATASET, + }, + headers=self.headers, + ) + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
    + + +
    +[docs] + def test_smartnoise_query(self) -> None: + """Test smartnoise-sql query""" + with TestClient(app, headers=self.headers) as client: + # Expect to work + response = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=self.headers, + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["query_response"]["columns"] == ["NB_ROW"] + assert response_dict["query_response"]["data"][0][0] > 0 + assert response_dict["spent_epsilon"] == SMARTNOISE_QUERY_EPSILON + assert response_dict["spent_delta"] >= SMARTNOISE_QUERY_DELTA + + # Expect to fail: missing parameters: delta and mechanisms + response = client.post( + "/smartnoise_query", + json={ + "query_str": "SELECT COUNT(*) AS NB_ROW FROM df", + "dataset_name": PENGUIN_DATASET, + "epsilon": SMARTNOISE_QUERY_EPSILON, + "postprocess": True, + }, + headers=self.headers, + ) + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + response_dict = json.loads(response.content.decode("utf8"))[ + "detail" + ] + assert response_dict[0]["type"] == "missing" + assert response_dict[0]["loc"] == ["body", "delta"] + assert response_dict[1]["type"] == "missing" + assert response_dict[1]["loc"] == ["body", "mechanisms"] + + # Expect to fail: not enough budget + input_smartnoise = dict(example_smartnoise_sql) + input_smartnoise["epsilon"] = 0.000000001 + response = client.post( + "/smartnoise_query", + json=input_smartnoise, + headers=self.headers, + ) + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert response.json() == { + "ExternalLibraryException": "Error obtaining cost: " + + "Noise scale is too large using epsilon=1e-09 " + + "and bounds (0, 1) with Mechanism.gaussian. " + + "Try preprocessing to reduce senstivity, " + + "or try different privacy parameters.", + "library": "smartnoise_sql", + } + + # Expect to fail: query does not make sense + input_smartnoise = dict(example_smartnoise_sql) + input_smartnoise["query_str"] = ( + "SELECT AVG(bill) FROM df" # no 'bill' column + ) + response = client.post( + "/smartnoise_query", + json=input_smartnoise, + headers=self.headers, + ) + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + assert response.json() == { + "ExternalLibraryException": "Error obtaining cost: " + + "Column cannot be found bill", + "library": "smartnoise_sql", + } + + # Expect to fail: dataset without access + input_smartnoise = dict(example_smartnoise_sql) + input_smartnoise["dataset_name"] = "IRIS" + response = client.post( + "/smartnoise_query", + json=input_smartnoise, + headers=self.headers, + ) + assert response.status_code == status.HTTP_403_FORBIDDEN + assert response.json() == { + "UnauthorizedAccessException": "" + + "Dr. Antartica does not have access to IRIS." + } + + # Expect to fail: dataset does not exist + input_smartnoise = dict(example_smartnoise_sql) + input_smartnoise["dataset_name"] = "I_do_not_exist" + response = client.post( + "/smartnoise_query", + json=input_smartnoise, + headers=self.headers, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": "" + + "Dataset I_do_not_exist does not exists. " + + "Please, verify the client object initialisation." + } + + # Expect to fail: user does not exist + new_headers = self.headers + new_headers["user-name"] = "I_do_not_exist" + response = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=new_headers, + ) + assert response.status_code == status.HTTP_403_FORBIDDEN + assert response.json() == { + "UnauthorizedAccessException": "" + + "User I_do_not_exist does not exist. " + + "Please, verify the client object initialisation." + }
    + + +
    +[docs] + def test_smartnoise_query_on_s3_dataset(self) -> None: + """Test smartnoise-sql on s3 dataset""" + if os.getenv(ENV_S3_INTEGRATION, "0").lower() in TRUE_VALUES: + with TestClient(app, headers=self.headers) as client: + # Expect to work + input_smartnoise = dict(example_smartnoise_sql) + input_smartnoise["dataset_name"] = "TINTIN_S3_TEST" + response = client.post( + "/smartnoise_query", + json=input_smartnoise, + headers=self.headers, + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["query_response"]["columns"] == ["NB_ROW"] + assert ( + response_dict["spent_epsilon"] == SMARTNOISE_QUERY_EPSILON + ) + assert response_dict["spent_delta"] >= SMARTNOISE_QUERY_DELTA
    + + +
    +[docs] + def test_dummy_smartnoise_query(self) -> None: + """test_dummy_smartnoise_query""" + with TestClient(app) as client: + # Expect to work + response = client.post( + "/dummy_smartnoise_query", json=example_dummy_smartnoise_sql + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["query_response"]["columns"] == ["res_0"] + assert response_dict["query_response"]["data"][0][0] > 0 + assert response_dict["query_response"]["data"][0][0] < 200
    + + +
    +[docs] + def test_smartnoise_cost(self) -> None: + """test_smartnoise_cost""" + with TestClient(app) as client: + # Expect to work + response = client.post( + "/estimate_smartnoise_cost", json=example_smartnoise_sql_cost + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["epsilon_cost"] == SMARTNOISE_QUERY_EPSILON + assert response_dict["delta_cost"] > SMARTNOISE_QUERY_DELTA
    + + +
    +[docs] + def test_opendp_query(self) -> None: # pylint: disable=R0915 + """test_opendp_query""" + enable_logging() + + with TestClient(app, headers=self.headers) as client: + # Basic test based on example with max divergence (Pure DP) + response = client.post( + "/opendp_query", + json=example_opendp, + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["query_response"] > 0 + assert response_dict["spent_epsilon"] > 0.1 + assert response_dict["spent_delta"] == 0 + + # Tests on different pipeline + colnames = [ + "species", + "island", + "bill_length_mm", + "bill_depth_mm", + "flipper_length_mm", + "body_mass_g", + "sex", + ] + transformation_pipeline = ( + dp_p.t.make_split_dataframe(separator=",", col_names=colnames) + >> dp_p.t.make_select_column(key="bill_length_mm", TOA=str) + >> dp_p.t.then_cast_default(TOA=float) + >> dp_p.t.then_clamp(bounds=(30.0, 65.0)) + >> dp_p.t.then_resize(size=346, constant=43.61) + >> dp_p.t.then_variance() + ) + + # Expect to fail: transormation instead of measurement + response = client.post( + "/opendp_query", + json={ + "dataset_name": PENGUIN_DATASET, + "opendp_json": transformation_pipeline.to_json(), + }, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": "The pipeline provided is not a " + + "measurement. It cannot be processed in this server." + } + + # Test MAX_DIVERGENCE (pure DP) + md_pipeline = transformation_pipeline >> dp_p.m.then_laplace( + scale=5.0 + ) + response = client.post( + "/opendp_query", + json={ + "dataset_name": PENGUIN_DATASET, + "opendp_json": md_pipeline.to_json(), + }, + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["query_response"] > 0 + assert response_dict["spent_epsilon"] > 0.1 + assert response_dict["spent_delta"] == 0 + + # Test ZERO_CONCENTRATED_DIVERGENCE + zcd_pipeline = transformation_pipeline >> dp_p.m.then_gaussian( + scale=5.0 + ) + json_obj = { + "dataset_name": PENGUIN_DATASET, + "opendp_json": zcd_pipeline.to_json(), + } + # Should error because missing fixed_delta + response = client.post("/opendp_query", json=json_obj) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": "" + + "fixed_delta must be set for smooth max divergence" + + " and zero concentrated divergence." + } + # Should work because fixed_delta is set + json_obj["fixed_delta"] = 1e-6 + response = client.post("/opendp_query", json=json_obj) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["query_response"] > 0 + assert response_dict["spent_epsilon"] > 0.1 + assert response_dict["spent_delta"] == 1e-6 + + # Test SMOOTHED_MAX_DIVERGENCE (approx DP) + sm_pipeline = dp_p.c.make_zCDP_to_approxDP(zcd_pipeline) + json_obj = { + "dataset_name": PENGUIN_DATASET, + "opendp_json": sm_pipeline.to_json(), + } + # Should error because missing fixed_delta + response = client.post("/opendp_query", json=json_obj) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": "" + + "fixed_delta must be set for smooth max divergence" + + " and zero concentrated divergence." + } + + # Should work because fixed_delta is set + json_obj["fixed_delta"] = 1e-6 + response = client.post("/opendp_query", json=json_obj) + assert response.status_code == status.HTTP_200_OK + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert response_dict["query_response"] > 0 + assert response_dict["spent_epsilon"] > 0.1 + assert response_dict["spent_delta"] == 1e-6 + + # Test FIXED_SMOOTHED_MAX_DIVERGENCE + fms_pipeline = ( + dp_p.t.make_split_dataframe(separator=",", col_names=colnames) + >> dp_p.t.make_select_column(key="island", TOA=str) + >> dp_p.t.then_count_by(MO=dp_p.L1Distance[float], TV=float) + >> dp_p.m.then_base_laplace_threshold( + scale=2.0, threshold=28.0 + ) + ) + json_obj = { + "dataset_name": PENGUIN_DATASET, + "opendp_json": fms_pipeline.to_json(), + } + # Should error because missing fixed_delta + response = client.post("/opendp_query", json=json_obj) + assert response.status_code == status.HTTP_200_OK + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + assert isinstance(response_dict["query_response"], dict) + assert response_dict["spent_epsilon"] > 0.1 + assert response_dict["spent_delta"] > 0
    + + +
    +[docs] + def test_dummy_opendp_query(self) -> None: + """test_dummy_opendp_query""" + with TestClient(app) as client: + # Expect to work + response = client.post( + "/dummy_opendp_query", json=example_dummy_opendp + ) + assert response.status_code == status.HTTP_200_OK + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["query_response"] > 0
    + + +
    +[docs] + def test_opendp_cost(self) -> None: + """test_opendp_cost""" + with TestClient(app) as client: + # Expect to work + response = client.post( + "/estimate_opendp_cost", json=example_opendp + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["epsilon_cost"] > 0.1 + assert response_dict["delta_cost"] == 0
    + + +
    +[docs] + def test_get_initial_budget(self) -> None: + """test_get_initial_budget""" + with TestClient(app, headers=self.headers) as client: + # Expect to work + response = client.post( + "/get_initial_budget", json=example_get_admin_db_data + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["initial_epsilon"] == INITAL_EPSILON + assert response_dict["initial_delta"] == INITIAL_DELTA + + # Query to spend budget + _ = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=self.headers, + ) + + # Response should stay the same + response_2 = client.post( + "/get_initial_budget", json=example_get_admin_db_data + ) + assert response_2.status_code == status.HTTP_200_OK + response_dict_2 = json.loads(response_2.content.decode("utf8")) + assert response_dict_2 == response_dict
    + + +
    +[docs] + def test_get_total_spent_budget(self) -> None: + """test_get_total_spent_budget""" + with TestClient(app, headers=self.headers) as client: + # Expect to work + response = client.post( + "/get_total_spent_budget", json=example_get_admin_db_data + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["total_spent_epsilon"] == 0 + assert response_dict["total_spent_delta"] == 0 + + # Query to spend budget + _ = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=self.headers, + ) + + # Response should have updated spent budget + response_2 = client.post( + "/get_total_spent_budget", json=example_get_admin_db_data + ) + assert response_2.status_code == status.HTTP_200_OK + + response_dict_2 = json.loads(response_2.content.decode("utf8")) + assert response_dict_2 != response_dict + assert ( + response_dict_2["total_spent_epsilon"] + == SMARTNOISE_QUERY_EPSILON + ) + assert ( + response_dict_2["total_spent_delta"] >= SMARTNOISE_QUERY_DELTA + )
    + + +
    +[docs] + def test_get_remaining_budget(self) -> None: + """test_get_remaining_budget""" + with TestClient(app, headers=self.headers) as client: + # Expect to work + response = client.post( + "/get_remaining_budget", json=example_get_admin_db_data + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["remaining_epsilon"] == INITAL_EPSILON + assert response_dict["remaining_delta"] == INITIAL_DELTA + + # Query to spend budget + _ = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=self.headers, + ) + + # Response should have removed spent budget + response_2 = client.post( + "/get_remaining_budget", json=example_get_admin_db_data + ) + assert response_2.status_code == status.HTTP_200_OK + + response_dict_2 = json.loads(response_2.content.decode("utf8")) + assert response_dict_2 != response_dict + assert ( + response_dict_2["remaining_epsilon"] + == INITAL_EPSILON - SMARTNOISE_QUERY_EPSILON + ) + assert ( + response_dict_2["remaining_delta"] + <= INITIAL_DELTA - SMARTNOISE_QUERY_DELTA + )
    + + +
    +[docs] + def test_get_previous_queries(self) -> None: + """test_get_previous_queries""" + with TestClient(app, headers=self.headers) as client: + # Expect to work + response = client.post( + "/get_previous_queries", json=example_get_admin_db_data + ) + assert response.status_code == status.HTTP_200_OK + + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["previous_queries"] == [] + + # Query to archive 1 (smartnoise) + query_res = client.post( + "/smartnoise_query", + json=example_smartnoise_sql, + headers=self.headers, + ) + query_res = json.loads(query_res.content.decode("utf8")) + + # Response should have one element in list + response_2 = client.post( + "/get_previous_queries", json=example_get_admin_db_data + ) + assert response_2.status_code == status.HTTP_200_OK + + response_dict_2 = json.loads(response_2.content.decode("utf8")) + assert len(response_dict_2["previous_queries"]) == 1 + assert ( + response_dict_2["previous_queries"][0]["dp_librairy"] + == DPLibraries.SMARTNOISE_SQL + ) + assert ( + response_dict_2["previous_queries"][0]["client_input"] + == example_smartnoise_sql + ) + assert ( + response_dict_2["previous_queries"][0]["response"] == query_res + ) + + # Query to archive 2 (opendp) + query_res = client.post( + "/opendp_query", + json=example_opendp, + ) + query_res = json.loads(query_res.content.decode("utf8")) + + # Response should have two elements in list + response_3 = client.post( + "/get_previous_queries", json=example_get_admin_db_data + ) + assert response_3.status_code == status.HTTP_200_OK + + response_dict_3 = json.loads(response_3.content.decode("utf8")) + assert len(response_dict_3["previous_queries"]) == 2 + assert ( + response_dict_3["previous_queries"][0] + == response_dict_2["previous_queries"][0] + ) + assert ( + response_dict_3["previous_queries"][1]["dp_librairy"] + == DPLibraries.OPENDP + ) + assert ( + response_dict_3["previous_queries"][1]["client_input"] + == example_opendp + ) + assert ( + response_dict_3["previous_queries"][1]["response"] == query_res + )
    + + +
    +[docs] + def test_budget_over_limit(self) -> None: + """test_budget_over_limit""" + with TestClient(app, headers=self.headers) as client: + # Should fail: too much budget on one go + smartnoise_body = dict(example_smartnoise_sql) + smartnoise_body["epsilon"] = EPSILON_LIMIT * 2 + + response = client.post( + "/smartnoise_query", + json=smartnoise_body, + headers=self.headers, + ) + + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + error = response.json()["detail"][0] + assert error["type"] == "less_than_equal" + assert error["loc"] == ["body", "epsilon"] + assert error["msg"] == "Input should be less than or equal to 5"
    + + +
    +[docs] + def test_subsequent_budget_limit_logic(self) -> None: + """test_subsequent_budget_limit_logic""" + with TestClient(app, headers=self.headers) as client: + # Should fail: too much budget after three queries + smartnoise_body = dict(example_smartnoise_sql) + smartnoise_body["epsilon"] = 4.0 + + # spend 4.0 (total_spent = 4.0 <= INTIAL_BUDGET = 10.0) + response = client.post( + "/smartnoise_query", + json=smartnoise_body, + headers=self.headers, + ) + assert response.status_code == status.HTTP_200_OK + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + + # spend 2*4.0 (total_spent = 8.0 <= INTIAL_BUDGET = 10.0) + response = client.post( + "/smartnoise_query", + json=smartnoise_body, + headers=self.headers, + ) + assert response.status_code == status.HTTP_200_OK + response_dict = json.loads(response.content.decode("utf8")) + assert response_dict["requested_by"] == self.user_name + + # spend 3*4.0 (total_spent = 12.0 > INITIAL_BUDGET = 10.0) + response = client.post( + "/smartnoise_query", + json=smartnoise_body, + headers=self.headers, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json() == { + "InvalidQueryException": "Not enough budget for this query " + + "epsilon remaining 2.0, " + + "delta remaining 0.004970000100000034." + }
    +
    + +
    + +
    +
    + +
    +
    +
    +
    +
    + + Version: stable + + +
    + +
    +
    Languages
    + +
    en
    + +
    + + +
    +
    Versions
    + +
    stable
    + +
    develop
    + +
    v0.1.0
    + +
    v0.1.1
    + +
    v0.1.2
    + +
    + +
    + +
    +
    + + + \ No newline at end of file diff --git a/html/_modules/lomas_server/tests/test_mongodb_admin.html b/html/_modules/lomas_server/tests/test_mongodb_admin.html new file mode 100644 index 00000000..c20a2289 --- /dev/null +++ b/html/_modules/lomas_server/tests/test_mongodb_admin.html @@ -0,0 +1,1219 @@ + + + + + + + + lomas_server.tests.test_mongodb_admin — Lomas 0.0.1 documentation + + + + + + + + + + + + + + + + +
    + + +
    + +
    +
    +
    +
      +
    • + + +
    • +
    • +
    +
    +
    +
    +
    + +

    Source code for lomas_server.tests.test_mongodb_admin

    +import os
    +import unittest
    +from types import SimpleNamespace
    +from typing import Dict
    +
    +import boto3
    +import yaml
    +from pymongo import MongoClient
    +
    +from admin_database.utils import get_mongodb_url
    +from mongodb_admin import (
    +    add_dataset,
    +    add_dataset_to_user,
    +    add_datasets_via_yaml,
    +    add_user,
    +    add_user_with_budget,
    +    add_users_via_yaml,
    +    del_dataset,
    +    del_dataset_to_user,
    +    del_user,
    +    drop_collection,
    +    get_list_of_datasets,
    +    get_list_of_datasets_from_user,
    +    get_list_of_users,
    +    set_budget_field,
    +    set_may_query,
    +    show_archives_of_user,
    +    show_collection,
    +    show_dataset,
    +    show_metadata_of_dataset,
    +    show_user,
    +)
    +from constants import PrivateDatabaseType
    +from tests.constants import (
    +    ENV_MONGO_INTEGRATION,
    +    ENV_S3_INTEGRATION,
    +    TRUE_VALUES,
    +    FALSE_VALUES,
    +)
    +from utils.config import CONFIG_LOADER, get_config
    +from utils.utils import add_demo_data_to_admindb
    +
    +
    +
    +[docs] +@unittest.skipIf( + ENV_MONGO_INTEGRATION not in os.environ + and os.getenv(ENV_MONGO_INTEGRATION, "0").lower() in FALSE_VALUES, + f"""Not an MongoDB integration test: {ENV_MONGO_INTEGRATION} + environment variable not set to True.""", +) +class TestMongoDBAdmin(unittest.TestCase): # pylint: disable=R0904 + """ + Tests for the functions in mongodb_admin.py. + + This is an integration test and requires a mongodb database + to be started before being executed. + + The test is only executed if the LOMAS_TEST_MONGO_INTEGRATION + environment variable is set to True. + """ + +
    +[docs] + @classmethod + def setUpClass(cls) -> None: + """Connection to database""" + CONFIG_LOADER.load_config( + config_path="tests/test_configs/test_config_mongo.yaml", + secrets_path="tests/test_configs/test_secrets.yaml", + ) + + db_args = SimpleNamespace(**vars(get_config().admin_database)) + db_url = get_mongodb_url(db_args) + cls.db = MongoClient(db_url)[db_args.db_name]
    + + +
    +[docs] + def tearDown(self) -> None: + """Drop all data from database""" + drop_collection(self.db, "metadata") + drop_collection(self.db, "datasets") + drop_collection(self.db, "users") + drop_collection(self.db, "queries_archives")
    + + +
    +[docs] + def test_add_user(self) -> None: + """Test adding a user""" + user = "Tintin" + + # Add user + add_user(self.db, user) + + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [], + } + + user_found = self.db.users.find_one({"user_name": "Tintin"}) + del user_found["_id"] + + self.assertEqual(user_found, expected_user) + + # Adding existing user raises error + with self.assertRaises(ValueError): + add_user(self.db, user)
    + + +
    +[docs] + def test_add_user_wb(self) -> None: + """Test adding a user with a dataset""" + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10 + delta = 0.02 + + add_user_with_budget(self.db, user, dataset, epsilon, delta) + expected_user = { # pylint: disable=duplicate-code + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + self.assertEqual(user_found, expected_user) + + # Adding budget to existing user should raise error + with self.assertRaises(ValueError): + add_user_with_budget(self.db, user, dataset, epsilon, delta)
    + + +
    +[docs] + def test_del_user(self) -> None: + """Test deleting a user""" + # Setup: add a user + user = "Tintin" + add_user(self.db, user) + + # Deleting user + del_user(self.db, user) + + expected_user = None + user_found = self.db.users.find_one({"user_name": user}) + self.assertEqual(user_found, expected_user) + + # Removing non-existing should raise error + with self.assertRaises(ValueError): + del_user(self.db, user)
    + + +
    +[docs] + def test_add_dataset_to_user(self) -> None: + """Test add dataset to a user""" + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10 + delta = 0.02 + + add_user(self.db, user) + add_dataset_to_user(self.db, user, dataset, epsilon, delta) + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + assert user_found == expected_user + + # Adding dataset to existing user with existing dataset should + # raise and error + epsilon = 20 + with self.assertRaises(ValueError): + add_dataset_to_user(self.db, user, dataset, epsilon, delta) + + # Adding dataset to non-existing user should raise an error + user = "Milou" + with self.assertRaises(ValueError): + add_dataset_to_user(self.db, user, dataset, epsilon, delta)
    + + +
    +[docs] + def test_del_dataset_to_user(self) -> None: + """Test delete dataset from user""" + # Setup: add user with dataset + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10 + delta = 0.02 + + add_user_with_budget(self.db, user, dataset, epsilon, delta) + + # Test dataset deletion + del_dataset_to_user(self.db, user, dataset) + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [], + } + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + self.assertEqual(user_found, expected_user) + + # Remove dataset from non-existant user should raise error + user = "Milou" + with self.assertRaises(ValueError): + del_dataset_to_user(self.db, user, dataset) + + # Remove dataset not present in user should raise error + user = "Tintin" + dataset = "Bijoux de la Castafiore" + with self.assertRaises(Exception): + del_dataset_to_user(self.db, user, dataset)
    + + +
    +[docs] + def test_set_budget_field(self) -> None: + """Test setting a budget field""" + # Setup: add user with budget + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10 + delta = 0.02 + + add_user_with_budget(self.db, user, dataset, epsilon, delta) + + # Updating budget should work + field = "initial_epsilon" + value = 15 + set_budget_field(self.db, user, dataset, field, value) + + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": value, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + self.assertEqual(user_found, expected_user) + + # Setting budget for non-existing user should fail + user = "Milou" + with self.assertRaises(ValueError): + set_budget_field(self.db, user, dataset, field, value) + + # Setting budget for non-existing dataset should fail + user = "Tintin" + dataset = "os de Milou" + with self.assertRaises(ValueError): + set_budget_field(self.db, user, dataset, field, value)
    + + +
    +[docs] + def test_set_may_query(self) -> None: + """Test set may query""" + # Setup: add user with budget + user = "Tintin" + dataset = "PENGUIN" + epsilon = 10 + delta = 0.02 + + add_user_with_budget(self.db, user, dataset, epsilon, delta) + + # Set may query + value = False + set_may_query(self.db, user, value) + + expected_user = { + "user_name": user, + "may_query": value, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + self.assertEqual(user_found, expected_user) + + # Raises error when user does not exist + user = "Milou" + with self.assertRaises(ValueError): + set_may_query(self.db, user, value)
    + + +
    +[docs] + def test_show_user(self) -> None: + """Test show user""" + user = "Milou" + dataset = "os" + epsilon = 20 + delta = 0.005 + add_user_with_budget(self.db, user, dataset, epsilon, delta) + user_found = show_user(self.db, "Milou") + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + self.assertEqual(user_found, expected_user) + + with self.assertRaises(ValueError): + user_found = show_user(self.db, "Bianca Castafiore")
    + + +
    +[docs] + def test_add_users_via_yaml(self) -> None: + """Test create user collection via YAML file""" + # Adding two users + path = "./tests/test_data/test_user_collection.yaml" + clean = False + overwrite = False + add_users_via_yaml(self.db, path, clean, overwrite) + + tintin = { + "user_name": "Tintin", + "may_query": True, + "datasets_list": [ + { + "dataset_name": "Bijoux de la Castafiore", + "initial_epsilon": 10, + "initial_delta": 0.005, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": "Tintin"}) + del user_found["_id"] + + self.assertEqual(user_found, tintin) + + milou = { + "user_name": "Milou", + "may_query": True, + "datasets_list": [ + { + "dataset_name": "os", + "initial_epsilon": 20, + "initial_delta": 0.005, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": "Milou"}) + del user_found["_id"] + + self.assertEqual(user_found, milou) + + # Check cleaning + user = "Tintin" + field = "initial_epsilon" + value = 25.0 + dataset = "Bijoux de la Castafiore" + set_budget_field(self.db, user, dataset, field, value) + + clean = True + add_users_via_yaml(self.db, path, clean, overwrite) + + user_found = self.db.users.find_one({"user_name": "Tintin"}) + del user_found["_id"] + self.assertEqual(user_found, tintin) + + user_found = self.db.users.find_one({"user_name": "Milou"}) + del user_found["_id"] + self.assertEqual(user_found, milou) + + # Check overwriting (with new user) + user = "Tintin" + field = "initial_epsilon" + value = False + dataset = "Bijoux de la Castafiore" + set_budget_field(self.db, user, dataset, field, value) + + user = "Milou" + del_user(self.db, user) + add_users_via_yaml(self.db, path, clean=False, overwrite=True) + + user_found = self.db.users.find_one({"user_name": "Tintin"}) + del user_found["_id"] + self.assertEqual(user_found, tintin) + + user_found = self.db.users.find_one({"user_name": "Milou"}) + del user_found["_id"] + self.assertEqual(user_found, milou) + + # Overwrite to false and existing users should warn + with self.assertWarns(UserWarning): + add_users_via_yaml(self.db, path, clean=False, overwrite=False)
    + + +
    +[docs] + def test_show_archives_of_user(self) -> None: + """Test show archives of user""" + add_user(self.db, "Milou") + add_user(self.db, "Tintin") + + # User exist but empty + archives_found = show_archives_of_user(self.db, "Milou") + expected_archives: list[Dict] = [] + self.assertEqual(archives_found, expected_archives) + + # User does not exist + with self.assertRaises(ValueError): + archives_found = show_archives_of_user( + self.db, "Bianca Castafiore" + ) + + # Add archives for Tintin and Dr. Antartica + path = "./tests/test_data/test_archives_collection.yaml" + with open(path, encoding="utf-8") as f: + archives = yaml.safe_load(f) + self.db.queries_archives.insert_many(archives) + + # Milou still empty + archives_found = show_archives_of_user(self.db, "Milou") + expected_archives = [] + self.assertEqual(archives_found, expected_archives) + + # Tintin has archives + archives_found = show_archives_of_user(self.db, "Tintin")[0] + expected_archives = archives[1] + + archives_found.pop("_id") + if isinstance(expected_archives, dict): + expected_archives.pop("_id") + + self.assertEqual(archives_found, expected_archives)
    + + +
    +[docs] + def test_get_list_of_users(self) -> None: + """Test get list of users""" + users_list = get_list_of_users(self.db) + self.assertEqual(users_list, []) + + dataset = "Bijoux de la Castafiore" + epsilon = 0.1 + delta = 0.0001 + add_user(self.db, "Bianca Castafiore") + add_user_with_budget(self.db, "Tintin", dataset, epsilon, delta) + add_user_with_budget(self.db, "Milou", dataset, epsilon, delta) + users_list = get_list_of_users(self.db) + self.assertEqual(users_list, ["Bianca Castafiore", "Tintin", "Milou"])
    + + +
    +[docs] + def test_get_list_of_datasets_from_users(self) -> None: + """Test get list of datasets from users""" + user = "Bianca Castafiore" + add_user(self.db, user) + + users_list = get_list_of_datasets_from_user(self.db, user) + self.assertEqual(users_list, []) + + epsilon = 0.1 + delta = 0.0001 + add_dataset_to_user( + self.db, user, "Bijoux de la Castafiore", epsilon, delta + ) + add_dataset_to_user( + self.db, user, "Le Sceptre d'Ottokar", epsilon, delta + ) + add_dataset_to_user( + self.db, user, "Les Sept Boules de cristal", epsilon, delta + ) + add_user_with_budget(self.db, "Milou", "os", 0.1, 0.001) + + dataset_list = get_list_of_datasets_from_user(self.db, user) + self.assertEqual( + dataset_list, + [ + "Bijoux de la Castafiore", + "Le Sceptre d'Ottokar", + "Les Sept Boules de cristal", + ], + ) + + dataset_list = get_list_of_datasets_from_user(self.db, "Milou") + self.assertEqual(dataset_list, ["os"])
    + + +
    +[docs] + def test_add_local_dataset(self) -> None: + """Test adding a local dataset""" + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + + expected_dataset = { + "dataset_name": dataset, + "database_type": database_type, + "dataset_path": dataset_path, + "metadata": { + "database_type": metadata_database_type, + "metadata_path": metadata_path, + }, + } + with open( + "./tests/test_data/metadata/penguin_metadata.yaml", + encoding="utf-8", + ) as f: + expected_metadata = yaml.safe_load(f) + + dataset_found = self.db.datasets.find_one({"dataset_name": "PENGUIN"}) + del dataset_found["_id"] + self.assertEqual(dataset_found, expected_dataset) + + metadata_found = self.db.metadata.find_one( + {dataset: {"$exists": True}} + )[dataset] + self.assertEqual(metadata_found, expected_metadata) + + # Add already present dataset + with self.assertRaises(ValueError): + add_dataset( + self.db, + dataset, + database_type=database_type, + metadata_database_type=metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + + # Add not already present dataset but present metadata + drop_collection(self.db, "datasets") + with self.assertRaises(ValueError): + add_dataset( + self.db, + dataset, + database_type=database_type, + metadata_database_type=metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + + # Restart clean + drop_collection(self.db, "metadata") + drop_collection(self.db, "datasets") + + # Unknown database type for dataset + with self.assertRaises(ValueError): + add_dataset( + self.db, + dataset, + database_type="type_that_does_not_exist", + metadata_database_type=metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + + # Unknown database type for metadata + with self.assertRaises(ValueError): + add_dataset( + self.db, + dataset, + database_type=database_type, + metadata_database_type="type_that_does_not_exist", + dataset_path=dataset_path, + metadata_path=metadata_path, + )
    + + +
    +[docs] + @unittest.skipIf( + ENV_S3_INTEGRATION not in os.environ + and os.getenv(ENV_S3_INTEGRATION, "0").lower() in FALSE_VALUES, + f"""Not an S3 integration test: {ENV_S3_INTEGRATION} + environment variable not set to True.""", + ) + def test_add_s3_dataset(self) -> None: # pylint: disable=R0914 + """Test adding a dataset stored on S3""" + dataset = "TINTIN_S3_TEST" + database_type = PrivateDatabaseType.S3 + metadata_database_type = PrivateDatabaseType.S3 + s3_bucket = "example" + endpoint_url = "http://localhost:9000" + aws_access_key_id = "admin" + aws_secret_access_key = "admin123" + s3_key_file = "data/test_penguin.csv" + s3_key_metadata = "metadata/penguin_metadata.yaml" + + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + s3_bucket=s3_bucket, + s3_key=s3_key_file, + endpoint_url=endpoint_url, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + metadata_s3_bucket=s3_bucket, + metadata_s3_key=s3_key_metadata, + metadata_endpoint_url=endpoint_url, + metadata_aws_access_key_id=aws_access_key_id, + metadata_aws_secret_access_key=aws_secret_access_key, + ) + + # Check dataset collection + expected_dataset = { + "dataset_name": dataset, + "database_type": database_type, + "s3_bucket": s3_bucket, + "s3_key": s3_key_file, + "endpoint_url": endpoint_url, + "aws_access_key_id": aws_access_key_id, + "aws_secret_access_key": aws_secret_access_key, + "metadata": { + "database_type": metadata_database_type, + "s3_bucket": s3_bucket, + "s3_key": s3_key_metadata, + "endpoint_url": endpoint_url, + "aws_access_key_id": aws_access_key_id, + "aws_secret_access_key": aws_secret_access_key, + }, + } + + dataset_found = self.db.datasets.find_one({"dataset_name": dataset}) + del dataset_found["_id"] + self.assertEqual(dataset_found, expected_dataset) + + # Check metadata collection + s3_client = boto3.client( + "s3", + endpoint_url=endpoint_url, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + response = s3_client.get_object(Bucket=s3_bucket, Key=s3_key_metadata) + expected_metadata = yaml.safe_load(response["Body"]) + + metadata_found = self.db.metadata.find_one( + {dataset: {"$exists": True}} + )[dataset] + self.assertEqual(metadata_found, expected_metadata)
    + + +
    +[docs] + def test_add_datasets_via_yaml(self) -> None: + """Test add datasets via a YAML file""" + # Load reference data + with open( + "./tests/test_data/test_datasets.yaml", + encoding="utf-8", + ) as f: + datasets = yaml.safe_load(f) + penguin = datasets["datasets"][0] + iris = datasets["datasets"][1] + + with open( + "./tests/test_data/metadata/penguin_metadata.yaml", + encoding="utf-8", + ) as f: + penguin_metadata = yaml.safe_load(f) + + def verify_datasets(): + # Check penguin and iris are in db + penguin_found = self.db.datasets.find_one( + {"dataset_name": "PENGUIN"} + ) + del penguin_found["_id"] + self.assertEqual(penguin_found, penguin) + + metadata_found = self.db.metadata.find_one( + {"PENGUIN": {"$exists": True}} + )["PENGUIN"] + self.assertEqual(metadata_found, penguin_metadata) + + iris_found = self.db.datasets.find_one({"dataset_name": "IRIS"}) + del iris_found["_id"] + self.assertEqual(iris_found, iris) + + metadata_found = self.db.metadata.find_one( + {"IRIS": {"$exists": True}} + )["IRIS"] + self.assertEqual(metadata_found, penguin_metadata) + + path = "./tests/test_data/test_datasets.yaml" + clean = False + overwrite_datasets = False + overwrite_metadata = False + + add_datasets_via_yaml( + self.db, path, clean, overwrite_datasets, overwrite_metadata + ) + + verify_datasets() + + # Check clean works + + # Add new dataset and then add_datasets_via_yaml with clean option + self.db.datasets.insert_one( + {"dataset_name": "Les aventures de Tintin"} + ) + + clean = True + add_datasets_via_yaml( + self.db, path, clean, overwrite_datasets, overwrite_metadata + ) + verify_datasets() + + # Check no overwrite triggers warning + clean = False + with self.assertWarns(UserWarning): + add_datasets_via_yaml( + self.db, path, clean, overwrite_datasets, overwrite_metadata + ) + + # Check overwrite works + self.db.datasets.update_one( + {"dataset_name": "IRIS"}, {"$set": {"dataset_name": "IRIS"}} + ) + + overwrite_datasets = True + add_datasets_via_yaml( + self.db, path, clean, overwrite_datasets, overwrite_metadata + ) + verify_datasets() + + # Check no clean and overwrite metadata + add_datasets_via_yaml( + self.db, + path, + clean=False, + overwrite_datasets=True, + overwrite_metadata=True, + ) + verify_datasets()
    + + +
    +[docs] + @unittest.skipIf( + ENV_S3_INTEGRATION not in os.environ + and os.getenv(ENV_S3_INTEGRATION, "0").lower() in FALSE_VALUES, + f"""Not an S3 integration test: {ENV_S3_INTEGRATION} + environment variable not set to True.""", + ) + def test_add_s3_datasets_via_yaml(self) -> None: + """Test add datasets via a YAML file""" + # Load reference data + dataset_path = "./tests/test_data/test_datasets_with_s3.yaml" + with open( + dataset_path, + encoding="utf-8", + ) as f: + datasets = yaml.safe_load(f) + tintin = datasets["datasets"][2] + + with open( + "./tests/test_data/metadata/penguin_metadata.yaml", + encoding="utf-8", + ) as f: + tintin_metadata = yaml.safe_load(f) + + clean = False + overwrite_datasets = False + overwrite_metadata = False + + add_datasets_via_yaml( + self.db, + dataset_path, + clean, + overwrite_datasets, + overwrite_metadata, + ) + + tintin_found = self.db.datasets.find_one( + {"dataset_name": "TINTIN_S3_TEST"} + ) + del tintin_found["_id"] + self.assertEqual(tintin_found, tintin) + + metadata_found = self.db.metadata.find_one( + {"TINTIN_S3_TEST": {"$exists": True}} + )["TINTIN_S3_TEST"] + self.assertEqual(metadata_found, tintin_metadata)
    + + +
    +[docs] + def test_del_dataset(self) -> None: + """Test dataset deletion""" + # Setup: add one dataset + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + + # Verify delete works + del_dataset(self.db, dataset) + + dataset_found = self.db.datasets.find_one({"dataset_name": "PENGUIN"}) + self.assertEqual(dataset_found, None) + + nb_metadata = self.db.metadata.count_documents({}) + self.assertEqual(nb_metadata, 0) + + # Delete non-existing dataset should trigger decorator error + with self.assertRaises(ValueError): + del_dataset(self.db, dataset) + + # Delete dataset with non-existing metadata should trigger decorator error + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + self.db.metadata.delete_many({dataset: {"$exists": True}}) + with self.assertRaises(ValueError): + del_dataset(self.db, dataset)
    + + +
    +[docs] + def test_show_dataset(self) -> None: + """Test show dataset""" + with self.assertRaises(ValueError): + dataset_found = show_dataset(self.db, "PENGUIN") + + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + dataset_found = show_dataset(self.db, "PENGUIN") + expected_dataset = { + "dataset_name": dataset, + "database_type": database_type, + "dataset_path": dataset_path, + "metadata": { + "database_type": metadata_database_type, + "metadata_path": metadata_path, + }, + } + self.assertEqual(dataset_found, expected_dataset)
    + + +
    +[docs] + def test_show_metadata_of_dataset(self) -> None: + """Test show metadata_dataset""" + with self.assertRaises(ValueError): + metadata_found = show_metadata_of_dataset(self.db, "PENGUIN") + + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + metadata_found = show_metadata_of_dataset(self.db, "PENGUIN") + with open(metadata_path, encoding="utf-8") as f: + expected_metadata = yaml.safe_load(f) + self.assertEqual(metadata_found, expected_metadata)
    + + +
    +[docs] + def test_get_list_of_datasets(self) -> None: + """Test get list of datasets""" + list_datasets = get_list_of_datasets(self.db) + self.assertEqual(list_datasets, []) + + path = "./tests/test_data/test_datasets.yaml" + clean = False + overwrite_datasets = False + overwrite_metadata = False + + add_datasets_via_yaml( + self.db, path, clean, overwrite_datasets, overwrite_metadata + ) + list_datasets = get_list_of_datasets(self.db) + self.assertEqual(list_datasets, ["PENGUIN", "IRIS"])
    + + +
    +[docs] + def test_drop_collection(self) -> None: + """Test drop collection from db""" + # Setup: add one dataset + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + add_dataset( + self.db, + dataset, + database_type, + metadata_database_type, + dataset_path=dataset_path, + metadata_path=metadata_path, + ) + + # Test + collection = "datasets" + drop_collection(self.db, collection) + + nb_datasets = self.db.datasets.count_documents({}) + self.assertEqual(nb_datasets, 0)
    + + +
    +[docs] + def test_show_collection(self) -> None: + """Test show collection from db""" + dataset_collection = show_collection(self.db, "datasets") + self.assertEqual(dataset_collection, []) + + path = "./tests/test_data/test_datasets.yaml" + clean = False + overwrite_datasets = False + overwrite_metadata = False + add_datasets_via_yaml( + self.db, path, clean, overwrite_datasets, overwrite_metadata + ) + with open(path, encoding="utf-8") as f: + expected_dataset_collection = yaml.safe_load(f) + dataset_collection = show_collection(self.db, "datasets") + self.assertEqual( + expected_dataset_collection["datasets"], dataset_collection + )
    + + +
    +[docs] + def test_add_demo_data_to_admindb(self) -> None: + """Test add demo data to admin db""" + + if os.getenv(ENV_S3_INTEGRATION, "0").lower() in TRUE_VALUES: + dataset_yaml = "tests/test_data/test_datasets_with_s3.yaml" + else: + dataset_yaml = "tests/test_data/test_datasets.yaml" + + add_demo_data_to_admindb( + user_yaml="./tests/test_data/test_user_collection.yaml", + dataset_yaml=dataset_yaml, + ) + + users_list = get_list_of_users(self.db) + self.assertEqual(users_list, ["Dr. Antartica", "Tintin", "Milou"]) + + list_datasets = get_list_of_datasets(self.db) + + if os.getenv(ENV_S3_INTEGRATION, "0").lower() in TRUE_VALUES: + self.assertEqual( + list_datasets, ["PENGUIN", "IRIS", "TINTIN_S3_TEST"] + ) + else: + self.assertEqual(list_datasets, ["PENGUIN", "IRIS"])
    +
    + +
    + +
    +
    + +
    +
    +
    +
    +
    + + Version: stable + + +
    + +
    +
    Languages
    + +
    en
    + +
    + + +
    +
    Versions
    + +
    stable
    + +
    develop
    + +
    v0.1.0
    + +
    v0.1.1
    + +
    v0.1.2
    + +
    + +
    + +
    +
    + + + \ No newline at end of file diff --git a/html/_modules/lomas_server/tests/test_mongodb_admin_cli.html b/html/_modules/lomas_server/tests/test_mongodb_admin_cli.html new file mode 100644 index 00000000..119d2777 --- /dev/null +++ b/html/_modules/lomas_server/tests/test_mongodb_admin_cli.html @@ -0,0 +1,900 @@ + + + + + + + + lomas_server.tests.test_mongodb_admin_cli — Lomas 0.0.1 documentation + + + + + + + + + + + + + + + + +
    + + +
    + +
    +
    +
    +
      +
    • + + +
    • +
    • +
    +
    +
    +
    +
    + +

    Source code for lomas_server.tests.test_mongodb_admin_cli

    +import os
    +import subprocess
    +import unittest
    +from types import SimpleNamespace
    +from typing import List
    +
    +import yaml
    +from pymongo import MongoClient
    +
    +from admin_database.utils import get_mongodb_url
    +from constants import PrivateDatabaseType
    +from tests.constants import ENV_MONGO_INTEGRATION
    +from utils.config import CONFIG_LOADER, get_config
    +
    +
    +
    +[docs] +@unittest.skipIf( + ENV_MONGO_INTEGRATION not in os.environ + and os.getenv(ENV_MONGO_INTEGRATION, "0").lower() in ("false", "0", "f"), + f"""Not an MongoDB integration test: {ENV_MONGO_INTEGRATION} + environment variable not set to True.""", +) +class TestMongoDBAdmin(unittest.TestCase): # pylint: disable=R0904 + """ + Tests for the functions in mongodb_admin.py. + + This is an integration test and requires a mongodb database + to be started before being executed. + + The test is only executed if the LOMAS_TEST_MONGO_INTEGRATION + environment variable is set to True. + """ + +
    +[docs] + @classmethod + def setUpClass(cls) -> None: + """Connection to database""" + CONFIG_LOADER.load_config( + config_path="tests/test_configs/test_config_mongo.yaml", + secrets_path="tests/test_configs/test_secrets.yaml", + ) + + # Access to MongoDB + db_args = SimpleNamespace(**vars(get_config().admin_database)) + db_url = get_mongodb_url(db_args) + cls.db = MongoClient(db_url)[db_args.db_name] + + # CLI args to connect to DB + admin_config = get_config().admin_database + cls.db_connection_cli = [ + "--username", + admin_config.username, + "--password", + admin_config.password, + "--address", + admin_config.address, + "--port", + str(admin_config.port), + "--db_name", + admin_config.db_name, + ]
    + + +
    +[docs] + def tearDown(self) -> None: + """Drop all data from database""" + self.run_cli_command("drop_collection", ["--collection", "metadata"]) + self.run_cli_command("drop_collection", ["--collection", "datasets"]) + self.run_cli_command("drop_collection", ["--collection", "users"]) + self.run_cli_command( + "drop_collection", ["--collection", "queries_archives"] + )
    + + +
    +[docs] + def run_cli_command(self, command: str, args: List) -> None: + """Run a MongoDB administration CLI command. + + Args: + command (str): The subcommand to run. + args (List[str]): A list of arguments for the subcommand. + + Raises: + ValueError: If the command returns a non-zero exit status. + """ + str_args = [str(arg) for arg in args] + + cli_command = ( + ["python", "mongodb_admin_cli.py", command] + + self.db_connection_cli + + str_args + ) + try: + subprocess.run( + cli_command, capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + error_message = ( + f"Command: {cli_command}\n" + f"Return Code: {e.returncode}\n" + f"Output: {e.output.strip()}\n" + f"Error: {e.stderr.strip()}" + ) + raise ValueError(error_message) from e
    + + +
    +[docs] + def test_add_user_cli(self) -> None: + """Test adding a user via cli""" + user = "Tintin" + + # Add user + self.run_cli_command("add_user", ["--user", user]) + + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [], + } + + user_found = self.db.users.find_one({"user_name": "Tintin"}) + del user_found["_id"] + self.assertEqual(user_found, expected_user) + + # Adding existing user raises error + with self.assertRaises(ValueError): + self.run_cli_command("add_user", ["-u", user]) + + # Not giving required argument + with self.assertRaises(ValueError): + self.run_cli_command("add_user", ["--nope", "willfail"])
    + + +
    +[docs] + def test_add_user_wb_cli(self) -> None: + """Test adding a user with a dataset via cli""" + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10.0 + delta = 0.02 + + self.run_cli_command( + "add_user_with_budget", + [ + "--user", + user, + "--dataset", + dataset, + "--epsilon", + epsilon, + "--delta", + delta, + ], + ) + + expected_user = { # pylint: disable=duplicate-code + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + self.assertEqual(user_found, expected_user) + + # Adding budget to existing user should raise error + with self.assertRaises(ValueError): + self.run_cli_command( + "add_user_with_budget", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + )
    + + +
    +[docs] + def test_del_user_cli(self) -> None: + """Test deleting a user via cli""" + # Setup: add a user + user = "Tintin" + self.run_cli_command("add_user", ["--user", user]) + + # Deleting user + self.run_cli_command("del_user", ["--user", user]) + + expected_user = None + user_found = self.db.users.find_one({"user_name": user}) + self.assertEqual(user_found, expected_user) + + # Removing non-existing should raise error + with self.assertRaises(ValueError): + self.run_cli_command("del_user", ["--user", user])
    + + +
    +[docs] + def test_add_dataset_to_user_cli(self) -> None: + """Test add dataset to a user via cli""" + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10.0 + delta = 0.02 + + self.run_cli_command("add_user", ["--user", user]) + self.run_cli_command( + "add_dataset_to_user", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + ) + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + assert user_found == expected_user + + # Adding dataset to existing user with existing dataset should + # raise and error + epsilon = 20.0 + with self.assertRaises(ValueError): + self.run_cli_command( + "add_dataset_to_user", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + ) + + # Adding dataset to non-existing user should raise an error + user = "Milou" + with self.assertRaises(ValueError): + self.run_cli_command( + "add_dataset_to_user", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + )
    + + +
    +[docs] + def test_del_dataset_to_user_cli(self) -> None: + """Test delete dataset from user via cli""" + # Setup: add user with dataset + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10.0 + delta = 0.02 + + self.run_cli_command( + "add_user_with_budget", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + ) + + # Test dataset deletion + self.run_cli_command( + "del_dataset_to_user", ["-u", user, "-d", dataset] + ) + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [], + } + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + self.assertEqual(user_found, expected_user) + + # Remove dataset from non-existant user should raise error + user = "Milou" + with self.assertRaises(ValueError): + self.run_cli_command( + "del_dataset_to_user", ["-u", user, "-d", dataset] + ) + + # Remove dataset not present in user should raise error + user = "Tintin" + dataset = "Bijoux de la Castafiore" + with self.assertRaises(Exception): + self.run_cli_command( + "del_dataset_to_user", ["-u", user, "-d", dataset] + )
    + + +
    +[docs] + def test_set_budget_field_cli(self) -> None: + """Test setting a budget field via cli""" + # Setup: add user with budget + user = "Tintin" + dataset = "Bijoux de la Castafiore" + epsilon = 10.0 + delta = 0.02 + + self.run_cli_command( + "add_user_with_budget", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + ) + + # Updating budget should work + field = "initial_epsilon" + value = 15 + self.run_cli_command( + "set_budget_field", + ["-u", user, "-d", dataset, "-f", field, "-v", value], + ) + + expected_user = { + "user_name": user, + "may_query": True, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": value, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + + self.assertEqual(user_found, expected_user) + + # Setting budget for non-existing user should fail + user = "Milou" + with self.assertRaises(ValueError): + self.run_cli_command( + "set_budget_field", + ["-u", user, "-d", dataset, "-f", field, "-v", value], + ) + + # Setting budget for non-existing dataset should fail + user = "Tintin" + dataset = "os de Milou" + with self.assertRaises(ValueError): + self.run_cli_command( + "set_budget_field", + ["-u", user, "-d", dataset, "-f", field, "-v", value], + )
    + + +
    +[docs] + def test_set_may_query_cli(self) -> None: + """Test set may query via cli""" + # Setup: add user with budget + user = "Tintin" + dataset = "PENGUIN" + epsilon = 10.0 + delta = 0.02 + + self.run_cli_command( + "add_user_with_budget", + ["-u", user, "-d", dataset, "-e", epsilon, "-del", delta], + ) + + # Set may query + self.run_cli_command("set_may_query", ["-u", user, "-v", "False"]) + + expected_user = { + "user_name": user, + "may_query": False, + "datasets_list": [ + { + "dataset_name": dataset, + "initial_epsilon": epsilon, + "initial_delta": delta, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": user}) + del user_found["_id"] + self.assertEqual(user_found, expected_user) + + # Raises error when user does not exist + user = "Milou" + with self.assertRaises(ValueError): + self.run_cli_command("set_may_query", ["-u", user, "-v", "False"])
    + + +
    +[docs] + def test_show_user_cli(self) -> None: + """Test show user via CLI + Does not verify output for not + """ + user = "Milou" + self.run_cli_command("add_user", ["-u", user]) + self.run_cli_command("show_user", ["-u", user])
    + + +
    +[docs] + def test_add_users_via_yaml_cli(self) -> None: + """Test create user collection via YAML file via cli""" + # Adding two users + path = "./tests/test_data/test_user_collection.yaml" + self.run_cli_command("add_users_via_yaml", ["-yf", path]) + + tintin = { + "user_name": "Tintin", + "may_query": True, + "datasets_list": [ + { + "dataset_name": "Bijoux de la Castafiore", + "initial_epsilon": 10, + "initial_delta": 0.005, + "total_spent_epsilon": 0.0, + "total_spent_delta": 0.0, + } + ], + } + + user_found = self.db.users.find_one({"user_name": "Tintin"}) + del user_found["_id"] + + self.assertEqual(user_found, tintin)
    + + +
    +[docs] + def test_show_archives_of_user_cli(self) -> None: + """Test show archives of user via CLI + Does not verify output for not + """ + user = "Milou" + self.run_cli_command("add_user", ["-u", user]) + self.run_cli_command("show_archives_of_user", ["-u", user])
    + + +
    +[docs] + def test_get_list_of_users_cli(self) -> None: + """Test get list of users via CLI + Does not verify output for not + """ + self.run_cli_command("get_list_of_users", []) + + self.run_cli_command("add_user", ["-u", "Milou"]) + self.run_cli_command("add_user", ["-u", "Tintin"]) + self.run_cli_command("get_list_of_users", [])
    + + +
    +[docs] + def test_get_list_of_datasets_from_user_cli(self) -> None: + """Test get list of users via CLI + Does not verify output for not + """ + user = "Milou" + self.run_cli_command("add_user", ["-u", user]) + self.run_cli_command("get_list_of_datasets_from_user", ["-u", user]) + + self.run_cli_command( + "add_dataset_to_user", + ["-u", user, "-d", "os", "-e", 0.1, "-del", 0.001], + ) + self.run_cli_command("get_list_of_datasets_from_user", ["-u", user])
    + + +
    +[docs] + def test_add_local_dataset_cli(self) -> None: + """Test adding a local dataset via cli""" + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + self.run_cli_command( + "add_dataset", + [ + "--dataset_name", + dataset, + "--database_type", + database_type, + "--dataset_path", + dataset_path, + "--metadata_database_type", + metadata_database_type, + "--metadata_path", + metadata_path, + ], + ) + + expected_dataset = { + "dataset_name": dataset, + "database_type": database_type, + "dataset_path": dataset_path, + "metadata": { + "database_type": metadata_database_type, + "metadata_path": metadata_path, + }, + } + with open( + "./tests/test_data/metadata/penguin_metadata.yaml", + encoding="utf-8", + ) as f: + expected_metadata = yaml.safe_load(f) + + dataset_found = self.db.datasets.find_one({"dataset_name": "PENGUIN"}) + del dataset_found["_id"] + self.assertEqual(dataset_found, expected_dataset) + + metadata_found = self.db.metadata.find_one( + {dataset: {"$exists": True}} + )[dataset] + self.assertEqual(metadata_found, expected_metadata)
    + + +
    +[docs] + def test_add_datasets_via_yaml_cli(self) -> None: + """Test add datasets via a YAML file via cli""" + # Load reference data + with open( + "./tests/test_data/test_datasets.yaml", + encoding="utf-8", + ) as f: + datasets = yaml.safe_load(f) + penguin = datasets["datasets"][0] + iris = datasets["datasets"][1] + + with open( + "./tests/test_data/metadata/penguin_metadata.yaml", + encoding="utf-8", + ) as f: + penguin_metadata = yaml.safe_load(f) + + def verify_datasets(): + # Check penguin and iris are in db + penguin_found = self.db.datasets.find_one( + {"dataset_name": "PENGUIN"} + ) + del penguin_found["_id"] + self.assertEqual(penguin_found, penguin) + + metadata_found = self.db.metadata.find_one( + {"PENGUIN": {"$exists": True}} + )["PENGUIN"] + self.assertEqual(metadata_found, penguin_metadata) + + iris_found = self.db.datasets.find_one({"dataset_name": "IRIS"}) + del iris_found["_id"] + self.assertEqual(iris_found, iris) + + metadata_found = self.db.metadata.find_one( + {"IRIS": {"$exists": True}} + )["IRIS"] + self.assertEqual(metadata_found, penguin_metadata) + + path = "./tests/test_data/test_datasets.yaml" + + self.run_cli_command("add_datasets_via_yaml", ["--yaml_file", path]) + verify_datasets()
    + + +
    +[docs] + def test_del_dataset_cli(self) -> None: + """Test dataset deletion via cli""" + # Setup: add one dataset + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + self.run_cli_command( + "add_dataset", + [ + "--dataset_name", + dataset, + "--database_type", + database_type, + "--dataset_path", + dataset_path, + "--metadata_database_type", + metadata_database_type, + "--metadata_path", + metadata_path, + ], + ) + + # Verify delete works + self.run_cli_command("del_dataset", ["--dataset", dataset]) + + dataset_found = self.db.datasets.find_one({"dataset_name": "PENGUIN"}) + self.assertEqual(dataset_found, None) + + nb_metadata = self.db.metadata.count_documents({}) + self.assertEqual(nb_metadata, 0) + + # Delete non-existing dataset should trigger error + with self.assertRaises(ValueError): + self.run_cli_command("del_dataset", ["--dataset", dataset])
    + + +
    +[docs] + def test_show_dataset_cli(self) -> None: + """Test show dataset + Does not verify output for not + """ + dataset = "PENGUIN" + with self.assertRaises(ValueError): + self.run_cli_command("show_dataset", ["--dataset", dataset]) + + self.run_cli_command( + "add_dataset", + [ + "--dataset_name", + dataset, + "--database_type", + PrivateDatabaseType.PATH, + "--dataset_path", + "some_path", + "--metadata_database_type", + PrivateDatabaseType.PATH, + "--metadata_path", + "./tests/test_data/metadata/penguin_metadata.yaml", + ], + ) + self.run_cli_command("show_dataset", ["--dataset", dataset])
    + + +
    +[docs] + def test_show_metadata_of_dataset_cli(self) -> None: + """Test show metadata_of dataset + Does not verify output for not + """ + dataset = "PENGUIN" + with self.assertRaises(ValueError): + self.run_cli_command( + "show_metadata_of_dataset", ["--dataset", dataset] + ) + + self.run_cli_command( + "add_dataset", + [ + "--dataset_name", + dataset, + "--database_type", + PrivateDatabaseType.PATH, + "--dataset_path", + "some_path", + "--metadata_database_type", + PrivateDatabaseType.PATH, + "--metadata_path", + "./tests/test_data/metadata/penguin_metadata.yaml", + ], + ) + self.run_cli_command( + "show_metadata_of_dataset", ["--dataset", dataset] + )
    + + +
    +[docs] + def test_get_list_of_datasets_cli(self) -> None: + """Test get list of datasets via CLI + Does not verify output for not + """ + self.run_cli_command("get_list_of_datasets", []) + self.run_cli_command( + "add_datasets_via_yaml", + ["--yaml_file", "./tests/test_data/test_datasets.yaml"], + ) + self.run_cli_command("get_list_of_datasets", [])
    + + +
    +[docs] + def test_drop_collection_cli(self) -> None: + """Test drop collection from db via cli""" + # Setup: add one dataset + dataset = "PENGUIN" + database_type = PrivateDatabaseType.PATH + dataset_path = "some_path" + metadata_database_type = PrivateDatabaseType.PATH + metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml" + + self.run_cli_command( + "add_dataset", + [ + "--dataset_name", + dataset, + "--database_type", + database_type, + "--dataset_path", + dataset_path, + "--metadata_database_type", + metadata_database_type, + "--metadata_path", + metadata_path, + ], + ) + + # Test + collection = "datasets" + self.run_cli_command("drop_collection", ["-c", collection]) + + nb_datasets = self.db.datasets.count_documents({}) + self.assertEqual(nb_datasets, 0)
    + + +
    +[docs] + def test_show_collection_cli(self) -> None: + """Test show collection from db via CLI""" + self.run_cli_command("show_collection", ["-c", "datasets"]) + self.run_cli_command( + "add_datasets_via_yaml", + ["--yaml_file", "./tests/test_data/test_datasets.yaml"], + ) + self.run_cli_command("show_collection", ["-c", "datasets"])
    +
    + +
    + +
    +
    + +
    +
    +
    +
    +
    + + Version: stable + + +
    + +
    +
    Languages
    + +
    en
    + +
    + + +
    +
    Versions
    + +
    stable
    + +
    develop
    + +
    v0.1.0
    + +
    v0.1.1
    + +
    v0.1.2
    + +
    + +
    + +
    +
    + + + \ No newline at end of file diff --git a/html/develop/en/_modules/index.html b/html/develop/en/_modules/index.html index 7e0bcb26..1c4bff86 100644 --- a/html/develop/en/_modules/index.html +++ b/html/develop/en/_modules/index.html @@ -90,21 +90,15 @@

    All modules for which code is available

    -
  • Dataset (class in lomas_server.utils.collections_models) -
  • -
  • dataset_cache (lomas_server.dataset_store.lru_dataset_store.LRUDatasetStore attribute)
  • dataset_must_exist() (in module lomas_server.admin_database.admin_database)
  • @@ -274,8 +266,6 @@

    D

  • (lomas_server.utils.config.Config attribute)
  • -
  • dataset_store_factory() (in module lomas_server.dataset_store.utils) -
  • DATASET_STORE_TYPE (lomas_server.constants.ConfigKeys attribute)
  • DatasetOfPathDB (class in lomas_server.utils.collections_models) @@ -309,11 +299,11 @@

    D

  • DB_TYPE_MONGODB (lomas_server.constants.ConfigKeys attribute)
  • DBConfig (class in lomas_server.utils.config) -
  • -
  • del_dataset() (in module lomas_server.mongodb_admin)
  • -
  • get_dataset_metadata() (lomas_client.client.Client method) +
  • get_dataset_metadata() (lomas_server.admin_database.admin_database.AdminDatabase method)
  • -
  • get_dummy_dataset() (lomas_client.client.Client method) -
  • get_dummy_dataset_for_query() (in module lomas_server.dp_queries.dummy_dataset)
  • get_epsilon_or_delta() (lomas_server.admin_database.admin_database.AdminDatabase method) @@ -518,20 +484,16 @@

    G

  • (lomas_server.admin_database.yaml_database.AdminYamlDatabase method)
  • -
  • get_initial_budget() (lomas_client.client.Client method) - -
  • get_list_of_datasets() (in module lomas_server.mongodb_admin)
  • get_list_of_datasets_from_user() (in module lomas_server.mongodb_admin) -
  • -
  • get_list_of_users() (in module lomas_server.mongodb_admin)
  • -
  • get_previous_queries() (lomas_client.client.Client method) +
  • get_querier() (lomas_server.dataset_store.dataset_store.DatasetStore method)
  • -
  • get_querier() (lomas_server.dataset_store.basic_dataset_store.BasicDatasetStore method) - -
  • -
  • get_remaining_budget() (lomas_client.client.Client method) - -
  • -
  • get_total_spent_budget() (lomas_client.client.Client method) - -
  • get_user_previous_queries() (lomas_server.admin_database.admin_database.AdminDatabase method)