+import os
+import subprocess
+import unittest
+from types import SimpleNamespace
+from typing import List
+
+import yaml
+from pymongo import MongoClient
+
+from admin_database.utils import get_mongodb_url
+from constants import PrivateDatabaseType
+from tests.constants import ENV_MONGO_INTEGRATION
+from utils.config import CONFIG_LOADER, get_config
+
+
+
+
[docs]
+
@unittest.skipIf(
+
ENV_MONGO_INTEGRATION not in os.environ
+
and os.getenv(ENV_MONGO_INTEGRATION, "0").lower() in ("false", "0", "f"),
+
f"""Not an MongoDB integration test: {ENV_MONGO_INTEGRATION}
+
environment variable not set to True.""",
+
)
+
class TestMongoDBAdmin(unittest.TestCase): # pylint: disable=R0904
+
"""
+
Tests for the functions in mongodb_admin.py.
+
+
This is an integration test and requires a mongodb database
+
to be started before being executed.
+
+
The test is only executed if the LOMAS_TEST_MONGO_INTEGRATION
+
environment variable is set to True.
+
"""
+
+
+
[docs]
+
@classmethod
+
def setUpClass(cls) -> None:
+
"""Connection to database"""
+
CONFIG_LOADER.load_config(
+
config_path="tests/test_configs/test_config_mongo.yaml",
+
secrets_path="tests/test_configs/test_secrets.yaml",
+
)
+
+
# Access to MongoDB
+
db_args = SimpleNamespace(**vars(get_config().admin_database))
+
db_url = get_mongodb_url(db_args)
+
cls.db = MongoClient(db_url)[db_args.db_name]
+
+
# CLI args to connect to DB
+
admin_config = get_config().admin_database
+
cls.db_connection_cli = [
+
"--username",
+
admin_config.username,
+
"--password",
+
admin_config.password,
+
"--address",
+
admin_config.address,
+
"--port",
+
str(admin_config.port),
+
"--db_name",
+
admin_config.db_name,
+
]
+
+
+
+
[docs]
+
def tearDown(self) -> None:
+
"""Drop all data from database"""
+
self.run_cli_command("drop_collection", ["--collection", "metadata"])
+
self.run_cli_command("drop_collection", ["--collection", "datasets"])
+
self.run_cli_command("drop_collection", ["--collection", "users"])
+
self.run_cli_command(
+
"drop_collection", ["--collection", "queries_archives"]
+
)
+
+
+
+
[docs]
+
def run_cli_command(self, command: str, args: List) -> None:
+
"""Run a MongoDB administration CLI command.
+
+
Args:
+
command (str): The subcommand to run.
+
args (List[str]): A list of arguments for the subcommand.
+
+
Raises:
+
ValueError: If the command returns a non-zero exit status.
+
"""
+
str_args = [str(arg) for arg in args]
+
+
cli_command = (
+
["python", "mongodb_admin_cli.py", command]
+
+ self.db_connection_cli
+
+ str_args
+
)
+
try:
+
subprocess.run(
+
cli_command, capture_output=True, text=True, check=True
+
)
+
except subprocess.CalledProcessError as e:
+
error_message = (
+
f"Command: {cli_command}\n"
+
f"Return Code: {e.returncode}\n"
+
f"Output: {e.output.strip()}\n"
+
f"Error: {e.stderr.strip()}"
+
)
+
raise ValueError(error_message) from e
+
+
+
+
[docs]
+
def test_add_user_cli(self) -> None:
+
"""Test adding a user via cli"""
+
user = "Tintin"
+
+
# Add user
+
self.run_cli_command("add_user", ["--user", user])
+
+
expected_user = {
+
"user_name": user,
+
"may_query": True,
+
"datasets_list": [],
+
}
+
+
user_found = self.db.users.find_one({"user_name": "Tintin"})
+
del user_found["_id"]
+
self.assertEqual(user_found, expected_user)
+
+
# Adding existing user raises error
+
with self.assertRaises(ValueError):
+
self.run_cli_command("add_user", ["-u", user])
+
+
# Not giving required argument
+
with self.assertRaises(ValueError):
+
self.run_cli_command("add_user", ["--nope", "willfail"])
+
+
+
+
[docs]
+
def test_add_user_wb_cli(self) -> None:
+
"""Test adding a user with a dataset via cli"""
+
user = "Tintin"
+
dataset = "Bijoux de la Castafiore"
+
epsilon = 10.0
+
delta = 0.02
+
+
self.run_cli_command(
+
"add_user_with_budget",
+
[
+
"--user",
+
user,
+
"--dataset",
+
dataset,
+
"--epsilon",
+
epsilon,
+
"--delta",
+
delta,
+
],
+
)
+
+
expected_user = { # pylint: disable=duplicate-code
+
"user_name": user,
+
"may_query": True,
+
"datasets_list": [
+
{
+
"dataset_name": dataset,
+
"initial_epsilon": epsilon,
+
"initial_delta": delta,
+
"total_spent_epsilon": 0.0,
+
"total_spent_delta": 0.0,
+
}
+
],
+
}
+
+
user_found = self.db.users.find_one({"user_name": user})
+
del user_found["_id"]
+
self.assertEqual(user_found, expected_user)
+
+
# Adding budget to existing user should raise error
+
with self.assertRaises(ValueError):
+
self.run_cli_command(
+
"add_user_with_budget",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
+
+
+
[docs]
+
def test_del_user_cli(self) -> None:
+
"""Test deleting a user via cli"""
+
# Setup: add a user
+
user = "Tintin"
+
self.run_cli_command("add_user", ["--user", user])
+
+
# Deleting user
+
self.run_cli_command("del_user", ["--user", user])
+
+
expected_user = None
+
user_found = self.db.users.find_one({"user_name": user})
+
self.assertEqual(user_found, expected_user)
+
+
# Removing non-existing should raise error
+
with self.assertRaises(ValueError):
+
self.run_cli_command("del_user", ["--user", user])
+
+
+
+
[docs]
+
def test_add_dataset_to_user_cli(self) -> None:
+
"""Test add dataset to a user via cli"""
+
user = "Tintin"
+
dataset = "Bijoux de la Castafiore"
+
epsilon = 10.0
+
delta = 0.02
+
+
self.run_cli_command("add_user", ["--user", user])
+
self.run_cli_command(
+
"add_dataset_to_user",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
expected_user = {
+
"user_name": user,
+
"may_query": True,
+
"datasets_list": [
+
{
+
"dataset_name": dataset,
+
"initial_epsilon": epsilon,
+
"initial_delta": delta,
+
"total_spent_epsilon": 0.0,
+
"total_spent_delta": 0.0,
+
}
+
],
+
}
+
+
user_found = self.db.users.find_one({"user_name": user})
+
del user_found["_id"]
+
+
assert user_found == expected_user
+
+
# Adding dataset to existing user with existing dataset should
+
# raise and error
+
epsilon = 20.0
+
with self.assertRaises(ValueError):
+
self.run_cli_command(
+
"add_dataset_to_user",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
+
# Adding dataset to non-existing user should raise an error
+
user = "Milou"
+
with self.assertRaises(ValueError):
+
self.run_cli_command(
+
"add_dataset_to_user",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
+
+
+
[docs]
+
def test_del_dataset_to_user_cli(self) -> None:
+
"""Test delete dataset from user via cli"""
+
# Setup: add user with dataset
+
user = "Tintin"
+
dataset = "Bijoux de la Castafiore"
+
epsilon = 10.0
+
delta = 0.02
+
+
self.run_cli_command(
+
"add_user_with_budget",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
+
# Test dataset deletion
+
self.run_cli_command(
+
"del_dataset_to_user", ["-u", user, "-d", dataset]
+
)
+
expected_user = {
+
"user_name": user,
+
"may_query": True,
+
"datasets_list": [],
+
}
+
user_found = self.db.users.find_one({"user_name": user})
+
del user_found["_id"]
+
self.assertEqual(user_found, expected_user)
+
+
# Remove dataset from non-existant user should raise error
+
user = "Milou"
+
with self.assertRaises(ValueError):
+
self.run_cli_command(
+
"del_dataset_to_user", ["-u", user, "-d", dataset]
+
)
+
+
# Remove dataset not present in user should raise error
+
user = "Tintin"
+
dataset = "Bijoux de la Castafiore"
+
with self.assertRaises(Exception):
+
self.run_cli_command(
+
"del_dataset_to_user", ["-u", user, "-d", dataset]
+
)
+
+
+
+
[docs]
+
def test_set_budget_field_cli(self) -> None:
+
"""Test setting a budget field via cli"""
+
# Setup: add user with budget
+
user = "Tintin"
+
dataset = "Bijoux de la Castafiore"
+
epsilon = 10.0
+
delta = 0.02
+
+
self.run_cli_command(
+
"add_user_with_budget",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
+
# Updating budget should work
+
field = "initial_epsilon"
+
value = 15
+
self.run_cli_command(
+
"set_budget_field",
+
["-u", user, "-d", dataset, "-f", field, "-v", value],
+
)
+
+
expected_user = {
+
"user_name": user,
+
"may_query": True,
+
"datasets_list": [
+
{
+
"dataset_name": dataset,
+
"initial_epsilon": value,
+
"initial_delta": delta,
+
"total_spent_epsilon": 0.0,
+
"total_spent_delta": 0.0,
+
}
+
],
+
}
+
+
user_found = self.db.users.find_one({"user_name": user})
+
del user_found["_id"]
+
+
self.assertEqual(user_found, expected_user)
+
+
# Setting budget for non-existing user should fail
+
user = "Milou"
+
with self.assertRaises(ValueError):
+
self.run_cli_command(
+
"set_budget_field",
+
["-u", user, "-d", dataset, "-f", field, "-v", value],
+
)
+
+
# Setting budget for non-existing dataset should fail
+
user = "Tintin"
+
dataset = "os de Milou"
+
with self.assertRaises(ValueError):
+
self.run_cli_command(
+
"set_budget_field",
+
["-u", user, "-d", dataset, "-f", field, "-v", value],
+
)
+
+
+
+
[docs]
+
def test_set_may_query_cli(self) -> None:
+
"""Test set may query via cli"""
+
# Setup: add user with budget
+
user = "Tintin"
+
dataset = "PENGUIN"
+
epsilon = 10.0
+
delta = 0.02
+
+
self.run_cli_command(
+
"add_user_with_budget",
+
["-u", user, "-d", dataset, "-e", epsilon, "-del", delta],
+
)
+
+
# Set may query
+
self.run_cli_command("set_may_query", ["-u", user, "-v", "False"])
+
+
expected_user = {
+
"user_name": user,
+
"may_query": False,
+
"datasets_list": [
+
{
+
"dataset_name": dataset,
+
"initial_epsilon": epsilon,
+
"initial_delta": delta,
+
"total_spent_epsilon": 0.0,
+
"total_spent_delta": 0.0,
+
}
+
],
+
}
+
+
user_found = self.db.users.find_one({"user_name": user})
+
del user_found["_id"]
+
self.assertEqual(user_found, expected_user)
+
+
# Raises error when user does not exist
+
user = "Milou"
+
with self.assertRaises(ValueError):
+
self.run_cli_command("set_may_query", ["-u", user, "-v", "False"])
+
+
+
+
[docs]
+
def test_show_user_cli(self) -> None:
+
"""Test show user via CLI
+
Does not verify output for not
+
"""
+
user = "Milou"
+
self.run_cli_command("add_user", ["-u", user])
+
self.run_cli_command("show_user", ["-u", user])
+
+
+
+
[docs]
+
def test_add_users_via_yaml_cli(self) -> None:
+
"""Test create user collection via YAML file via cli"""
+
# Adding two users
+
path = "./tests/test_data/test_user_collection.yaml"
+
self.run_cli_command("add_users_via_yaml", ["-yf", path])
+
+
tintin = {
+
"user_name": "Tintin",
+
"may_query": True,
+
"datasets_list": [
+
{
+
"dataset_name": "Bijoux de la Castafiore",
+
"initial_epsilon": 10,
+
"initial_delta": 0.005,
+
"total_spent_epsilon": 0.0,
+
"total_spent_delta": 0.0,
+
}
+
],
+
}
+
+
user_found = self.db.users.find_one({"user_name": "Tintin"})
+
del user_found["_id"]
+
+
self.assertEqual(user_found, tintin)
+
+
+
+
[docs]
+
def test_show_archives_of_user_cli(self) -> None:
+
"""Test show archives of user via CLI
+
Does not verify output for not
+
"""
+
user = "Milou"
+
self.run_cli_command("add_user", ["-u", user])
+
self.run_cli_command("show_archives_of_user", ["-u", user])
+
+
+
+
[docs]
+
def test_get_list_of_users_cli(self) -> None:
+
"""Test get list of users via CLI
+
Does not verify output for not
+
"""
+
self.run_cli_command("get_list_of_users", [])
+
+
self.run_cli_command("add_user", ["-u", "Milou"])
+
self.run_cli_command("add_user", ["-u", "Tintin"])
+
self.run_cli_command("get_list_of_users", [])
+
+
+
+
[docs]
+
def test_get_list_of_datasets_from_user_cli(self) -> None:
+
"""Test get list of users via CLI
+
Does not verify output for not
+
"""
+
user = "Milou"
+
self.run_cli_command("add_user", ["-u", user])
+
self.run_cli_command("get_list_of_datasets_from_user", ["-u", user])
+
+
self.run_cli_command(
+
"add_dataset_to_user",
+
["-u", user, "-d", "os", "-e", 0.1, "-del", 0.001],
+
)
+
self.run_cli_command("get_list_of_datasets_from_user", ["-u", user])
+
+
+
+
[docs]
+
def test_add_local_dataset_cli(self) -> None:
+
"""Test adding a local dataset via cli"""
+
dataset = "PENGUIN"
+
database_type = PrivateDatabaseType.PATH
+
dataset_path = "some_path"
+
metadata_database_type = PrivateDatabaseType.PATH
+
metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml"
+
+
self.run_cli_command(
+
"add_dataset",
+
[
+
"--dataset_name",
+
dataset,
+
"--database_type",
+
database_type,
+
"--dataset_path",
+
dataset_path,
+
"--metadata_database_type",
+
metadata_database_type,
+
"--metadata_path",
+
metadata_path,
+
],
+
)
+
+
expected_dataset = {
+
"dataset_name": dataset,
+
"database_type": database_type,
+
"dataset_path": dataset_path,
+
"metadata": {
+
"database_type": metadata_database_type,
+
"metadata_path": metadata_path,
+
},
+
}
+
with open(
+
"./tests/test_data/metadata/penguin_metadata.yaml",
+
encoding="utf-8",
+
) as f:
+
expected_metadata = yaml.safe_load(f)
+
+
dataset_found = self.db.datasets.find_one({"dataset_name": "PENGUIN"})
+
del dataset_found["_id"]
+
self.assertEqual(dataset_found, expected_dataset)
+
+
metadata_found = self.db.metadata.find_one(
+
{dataset: {"$exists": True}}
+
)[dataset]
+
self.assertEqual(metadata_found, expected_metadata)
+
+
+
+
[docs]
+
def test_add_datasets_via_yaml_cli(self) -> None:
+
"""Test add datasets via a YAML file via cli"""
+
# Load reference data
+
with open(
+
"./tests/test_data/test_datasets.yaml",
+
encoding="utf-8",
+
) as f:
+
datasets = yaml.safe_load(f)
+
penguin = datasets["datasets"][0]
+
iris = datasets["datasets"][1]
+
+
with open(
+
"./tests/test_data/metadata/penguin_metadata.yaml",
+
encoding="utf-8",
+
) as f:
+
penguin_metadata = yaml.safe_load(f)
+
+
def verify_datasets():
+
# Check penguin and iris are in db
+
penguin_found = self.db.datasets.find_one(
+
{"dataset_name": "PENGUIN"}
+
)
+
del penguin_found["_id"]
+
self.assertEqual(penguin_found, penguin)
+
+
metadata_found = self.db.metadata.find_one(
+
{"PENGUIN": {"$exists": True}}
+
)["PENGUIN"]
+
self.assertEqual(metadata_found, penguin_metadata)
+
+
iris_found = self.db.datasets.find_one({"dataset_name": "IRIS"})
+
del iris_found["_id"]
+
self.assertEqual(iris_found, iris)
+
+
metadata_found = self.db.metadata.find_one(
+
{"IRIS": {"$exists": True}}
+
)["IRIS"]
+
self.assertEqual(metadata_found, penguin_metadata)
+
+
path = "./tests/test_data/test_datasets.yaml"
+
+
self.run_cli_command("add_datasets_via_yaml", ["--yaml_file", path])
+
verify_datasets()
+
+
+
+
[docs]
+
def test_del_dataset_cli(self) -> None:
+
"""Test dataset deletion via cli"""
+
# Setup: add one dataset
+
dataset = "PENGUIN"
+
database_type = PrivateDatabaseType.PATH
+
dataset_path = "some_path"
+
metadata_database_type = PrivateDatabaseType.PATH
+
metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml"
+
+
self.run_cli_command(
+
"add_dataset",
+
[
+
"--dataset_name",
+
dataset,
+
"--database_type",
+
database_type,
+
"--dataset_path",
+
dataset_path,
+
"--metadata_database_type",
+
metadata_database_type,
+
"--metadata_path",
+
metadata_path,
+
],
+
)
+
+
# Verify delete works
+
self.run_cli_command("del_dataset", ["--dataset", dataset])
+
+
dataset_found = self.db.datasets.find_one({"dataset_name": "PENGUIN"})
+
self.assertEqual(dataset_found, None)
+
+
nb_metadata = self.db.metadata.count_documents({})
+
self.assertEqual(nb_metadata, 0)
+
+
# Delete non-existing dataset should trigger error
+
with self.assertRaises(ValueError):
+
self.run_cli_command("del_dataset", ["--dataset", dataset])
+
+
+
+
[docs]
+
def test_show_dataset_cli(self) -> None:
+
"""Test show dataset
+
Does not verify output for not
+
"""
+
dataset = "PENGUIN"
+
with self.assertRaises(ValueError):
+
self.run_cli_command("show_dataset", ["--dataset", dataset])
+
+
self.run_cli_command(
+
"add_dataset",
+
[
+
"--dataset_name",
+
dataset,
+
"--database_type",
+
PrivateDatabaseType.PATH,
+
"--dataset_path",
+
"some_path",
+
"--metadata_database_type",
+
PrivateDatabaseType.PATH,
+
"--metadata_path",
+
"./tests/test_data/metadata/penguin_metadata.yaml",
+
],
+
)
+
self.run_cli_command("show_dataset", ["--dataset", dataset])
+
+
+
+
+
+
+
[docs]
+
def test_get_list_of_datasets_cli(self) -> None:
+
"""Test get list of datasets via CLI
+
Does not verify output for not
+
"""
+
self.run_cli_command("get_list_of_datasets", [])
+
self.run_cli_command(
+
"add_datasets_via_yaml",
+
["--yaml_file", "./tests/test_data/test_datasets.yaml"],
+
)
+
self.run_cli_command("get_list_of_datasets", [])
+
+
+
+
[docs]
+
def test_drop_collection_cli(self) -> None:
+
"""Test drop collection from db via cli"""
+
# Setup: add one dataset
+
dataset = "PENGUIN"
+
database_type = PrivateDatabaseType.PATH
+
dataset_path = "some_path"
+
metadata_database_type = PrivateDatabaseType.PATH
+
metadata_path = "./tests/test_data/metadata/penguin_metadata.yaml"
+
+
self.run_cli_command(
+
"add_dataset",
+
[
+
"--dataset_name",
+
dataset,
+
"--database_type",
+
database_type,
+
"--dataset_path",
+
dataset_path,
+
"--metadata_database_type",
+
metadata_database_type,
+
"--metadata_path",
+
metadata_path,
+
],
+
)
+
+
# Test
+
collection = "datasets"
+
self.run_cli_command("drop_collection", ["-c", collection])
+
+
nb_datasets = self.db.datasets.count_documents({})
+
self.assertEqual(nb_datasets, 0)
+
+
+
+
[docs]
+
def test_show_collection_cli(self) -> None:
+
"""Test show collection from db via CLI"""
+
self.run_cli_command("show_collection", ["-c", "datasets"])
+
self.run_cli_command(
+
"add_datasets_via_yaml",
+
["--yaml_file", "./tests/test_data/test_datasets.yaml"],
+
)
+
self.run_cli_command("show_collection", ["-c", "datasets"])
+
+
+