From 08fc13c0bcb78cd3862f18174f1284a253fe7101 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Sat, 27 Mar 2021 10:54:42 +0000 Subject: [PATCH] Simulation scenarios, TLO CLI, interaction with Azure Batch (#249) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mosè Giordano Co-authored-by: Tim Hallett <39991060+tbhallett@users.noreply.github.com> --- .gitignore | 4 + MANIFEST.in | 2 + deploy/.gitignore | 1 + deploy/Dockerfile | 26 + deploy/deploy.sh | 27 + deploy/id_ed25519.pub | 1 + deploy/known_hosts | 1 + requirements/base.in | 8 + requirements/base.txt | 113 ++- requirements/dev.txt | 115 ++- setup.py | 4 + src/scripts/dev/scenarios/playing_22.py | 63 ++ src/scripts/dev/scenarios/test_scenario_1.py | 63 ++ .../2D_grid/analysing_mockitis_2D_grid.py | 60 ++ .../th_testing/2D_grid/mockitis_2D_grid.py | 75 ++ .../analysing_mockitis_single_sweep.py | 104 +++ .../single_sweep/mockitis_single_sweep.py | 71 ++ src/scripts/profiling/batch_test.py | 93 +++ src/scripts/profiling/scale_run.py | 2 +- src/tlo/analysis/utils.py | 153 ++++ src/tlo/cli.py | 666 ++++++++++++++++++ src/tlo/logging/core.py | 13 +- src/tlo/scenario.py | 334 +++++++++ tlo.example.conf | 11 + 24 files changed, 1993 insertions(+), 17 deletions(-) create mode 100644 deploy/.gitignore create mode 100644 deploy/Dockerfile create mode 100755 deploy/deploy.sh create mode 100644 deploy/id_ed25519.pub create mode 100644 deploy/known_hosts create mode 100644 src/scripts/dev/scenarios/playing_22.py create mode 100644 src/scripts/dev/scenarios/test_scenario_1.py create mode 100644 src/scripts/dev/th_testing/2D_grid/analysing_mockitis_2D_grid.py create mode 100644 src/scripts/dev/th_testing/2D_grid/mockitis_2D_grid.py create mode 100644 src/scripts/dev/th_testing/single_sweep/analysing_mockitis_single_sweep.py create mode 100644 src/scripts/dev/th_testing/single_sweep/mockitis_single_sweep.py create mode 100644 src/scripts/profiling/batch_test.py create mode 100644 src/tlo/cli.py create mode 100644 src/tlo/scenario.py create mode 100644 tlo.example.conf diff --git a/.gitignore b/.gitignore index 13a2464249..616ff42acf 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,7 @@ venv.bak/ # TLO .rst files docs/reference/tlo*.rst +# TLO configuration +tlo.conf + + diff --git a/MANIFEST.in b/MANIFEST.in index 3c7a793d45..3d6db42e77 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,9 +7,11 @@ include README.rst include .bumpversion.cfg include .coveragerc include tox.ini .travis.yml +include tlo.example.conf exclude .editorconfig recursive-exclude .ci * +recursive-exclude deploy * recursive-exclude outputs * recursive-exclude resources * diff --git a/deploy/.gitignore b/deploy/.gitignore new file mode 100644 index 0000000000..7b3596a0d8 --- /dev/null +++ b/deploy/.gitignore @@ -0,0 +1 @@ +/id_ed25519 diff --git a/deploy/Dockerfile b/deploy/Dockerfile new file mode 100644 index 0000000000..00bd10b483 --- /dev/null +++ b/deploy/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.8-slim-buster + +# Setup SSH +RUN mkdir /root/.ssh +COPY id_ed25519 /root/.ssh/id_ed25519 +COPY known_hosts /root/.ssh/known_hosts +RUN chmod -R 0600 /root/.ssh + +# Install Git, Git LFS. How to silence apt-get commands: +# https://peteris.rocks/blog/quiet-and-unattended-installation-with-apt-get/ +RUN apt-get update -qq +RUN DEBIAN_FRONTEND=noninteractive apt-get install -qq -y git git-lfs < /dev/null > /dev/null +# Cleanup apt cache +RUN rm -rf /var/lib/apt/lists/* +# Configure git lfs +RUN git lfs install + +# Clone the TLOModel repository and move in it +ARG BRANCH_NAME="master" +RUN git clone --branch "${BRANCH_NAME}" git@github.com:UCL/TLOmodel.git /TLOmodel +WORKDIR /TLOmodel +RUN git gc --aggressive + +# Install dependencies +RUN pip3 install -r requirements/dev.txt +RUN pip3 install -e . diff --git a/deploy/deploy.sh b/deploy/deploy.sh new file mode 100755 index 0000000000..5ec62e1938 --- /dev/null +++ b/deploy/deploy.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +set -e + +REGISTRY_NAME="tlomodel" +REGISTRY_URL="${REGISTRY_NAME}.azurecr.io" +IMAGE_NAME="tlo" +IMAGE_TAG="1.0" +IMAGE_FULL_NAME="${IMAGE_NAME}:${IMAGE_TAG}" + +# Documentation at +# https://docs.microsoft.com/en-us/azure/container-registry/container-registry-get-started-docker-cli. + +# Login to Azure Container Registry +echo -n "Logging into ${REGISTRY_NAME}..." +az acr login --name "${REGISTRY_NAME}" +echo "done" +# Build the image +echo "Building docker image ${IMAGE_FULL_NAME}..." +docker build --tag "${IMAGE_FULL_NAME}" . +# Tag the image +echo -n "Tagging ${REGISTRY_URL}/${IMAGE_FULL_NAME}..." +docker tag "${IMAGE_FULL_NAME}" "${REGISTRY_URL}/${IMAGE_FULL_NAME}" +echo "done" +# Push the image +echo "Pushing ${REGISTRY_URL}/${IMAGE_FULL_NAME}..." +docker push "${REGISTRY_URL}/${IMAGE_FULL_NAME}" diff --git a/deploy/id_ed25519.pub b/deploy/id_ed25519.pub new file mode 100644 index 0000000000..ea4dd49918 --- /dev/null +++ b/deploy/id_ed25519.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIC+bU4n6EHIFRqTI1617bR9A54WLjawxGow+X7Rvb1M6 m.giordano@ucl.ac.uk diff --git a/deploy/known_hosts b/deploy/known_hosts new file mode 100644 index 0000000000..2a935a9a2a --- /dev/null +++ b/deploy/known_hosts @@ -0,0 +1 @@ +|1|/L/ftN7pdpk7XqhXWGqqXFDK9yQ=|0IcBIB1SvDcSoXcKhpsmvWRkHDw= ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ== diff --git a/requirements/base.in b/requirements/base.in index e0bce0751f..2d82d7257f 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,6 +1,14 @@ # Base requirements file - common packages we use directly +GitPython +click numpy pandas scipy openpyxl + +# To submit jobs to Azure Batch +azure-batch +azure-identity +azure-keyvault +azure-storage-file-share diff --git a/requirements/base.txt b/requirements/base.txt index 6184f85526..34f15622aa 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,24 +4,125 @@ # # pip-compile --output-file=base.txt base.in # +adal==1.2.6 + # via msrestazure +azure-batch==10.0.0 + # via -r base.in +azure-common==1.1.26 + # via + # azure-batch + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets +azure-core==1.11.0 + # via + # azure-identity + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-storage-file-share +azure-identity==1.5.0 + # via -r base.in +azure-keyvault-certificates==4.2.1 + # via azure-keyvault +azure-keyvault-keys==4.3.1 + # via azure-keyvault +azure-keyvault-secrets==4.2.0 + # via azure-keyvault +azure-keyvault==4.1.0 + # via -r base.in +azure-storage-file-share==12.4.1 + # via -r base.in +certifi==2020.12.5 + # via + # msrest + # requests +cffi==1.14.5 + # via cryptography +chardet==4.0.0 + # via requests +click==7.1.2 + # via -r base.in +cryptography==3.4.6 + # via + # adal + # azure-identity + # azure-keyvault-keys + # azure-storage-file-share + # msal + # pyjwt et-xmlfile==1.0.1 # via openpyxl +gitdb==4.0.5 + # via gitpython +gitpython==3.1.13 + # via -r base.in +idna==2.10 + # via requests +isodate==0.6.0 + # via msrest jdcal==1.4.1 # via openpyxl -numpy==1.19.5 +msal-extensions==0.3.0 + # via azure-identity +msal==1.9.0 + # via + # azure-identity + # msal-extensions +msrest==0.6.21 + # via + # azure-batch + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-storage-file-share + # msrestazure +msrestazure==0.6.4 + # via azure-batch +numpy==1.20.1 # via # -r base.in # pandas # scipy +oauthlib==3.1.0 + # via requests-oauthlib openpyxl==3.0.6 # via -r base.in -pandas==1.2.1 +pandas==1.2.2 # via -r base.in +portalocker==1.7.1 + # via msal-extensions +pycparser==2.20 + # via cffi +pyjwt[crypto]==2.0.1 + # via + # adal + # msal python-dateutil==2.8.1 + # via + # adal + # pandas +pytz==2021.1 # via pandas -pytz==2020.5 - # via pandas -scipy==1.6.0 +requests-oauthlib==1.3.0 + # via msrest +requests==2.25.1 + # via + # adal + # azure-core + # msal + # msrest + # requests-oauthlib +scipy==1.6.1 # via -r base.in six==1.15.0 - # via python-dateutil + # via + # azure-core + # azure-identity + # isodate + # msrestazure + # python-dateutil +smmap==3.0.5 + # via gitdb +urllib3==1.26.3 + # via requests diff --git a/requirements/dev.txt b/requirements/dev.txt index 648bf4347a..b74f6b9650 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,12 +4,59 @@ # # pip-compile --output-file=dev.txt dev.in # +adal==1.2.6 + # via msrestazure appdirs==1.4.4 # via virtualenv attrs==20.3.0 # via pytest +azure-batch==10.0.0 + # via -r base.in +azure-common==1.1.26 + # via + # azure-batch + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets +azure-core==1.11.0 + # via + # azure-identity + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-storage-file-share +azure-identity==1.5.0 + # via -r base.in +azure-keyvault-certificates==4.2.1 + # via azure-keyvault +azure-keyvault-keys==4.3.1 + # via azure-keyvault +azure-keyvault-secrets==4.2.0 + # via azure-keyvault +azure-keyvault==4.1.0 + # via -r base.in +azure-storage-file-share==12.4.1 + # via -r base.in +certifi==2020.12.5 + # via + # msrest + # requests +cffi==1.14.5 + # via cryptography +chardet==4.0.0 + # via requests click==7.1.2 - # via pip-tools + # via + # -r base.in + # pip-tools +cryptography==3.4.6 + # via + # adal + # azure-identity + # azure-keyvault-keys + # azure-storage-file-share + # msal + # pyjwt cycler==0.10.0 # via matplotlib distlib==0.3.1 @@ -20,27 +67,53 @@ filelock==3.0.12 # via # tox # virtualenv +gitdb==4.0.5 + # via gitpython +gitpython==3.1.13 + # via -r base.in +idna==2.10 + # via requests iniconfig==1.1.1 # via pytest +isodate==0.6.0 + # via msrest jdcal==1.4.1 # via openpyxl kiwisolver==1.3.1 # via matplotlib matplotlib==3.3.4 # via -r dev.in -numpy==1.19.5 +msal-extensions==0.3.0 + # via azure-identity +msal==1.9.0 + # via + # azure-identity + # msal-extensions +msrest==0.6.21 + # via + # azure-batch + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-storage-file-share + # msrestazure +msrestazure==0.6.4 + # via azure-batch +numpy==1.20.1 # via # -r base.in # matplotlib # pandas # scipy +oauthlib==3.1.0 + # via requests-oauthlib openpyxl==3.0.6 # via -r base.in -packaging==20.8 +packaging==20.9 # via # pytest # tox -pandas==1.2.1 +pandas==1.2.2 # via -r base.in pillow==8.1.0 # via matplotlib @@ -50,10 +123,18 @@ pluggy==0.13.1 # via # pytest # tox +portalocker==1.7.1 + # via msal-extensions py==1.10.0 # via # pytest # tox +pycparser==2.20 + # via cffi +pyjwt[crypto]==2.0.1 + # via + # adal + # msal pyparsing==2.4.7 # via # matplotlib @@ -64,25 +145,43 @@ pytest==6.2.2 # via -r dev.in python-dateutil==2.8.1 # via + # adal # matplotlib # pandas -pytz==2020.5 +pytz==2021.1 # via pandas -scipy==1.6.0 +requests-oauthlib==1.3.0 + # via msrest +requests==2.25.1 + # via + # adal + # azure-core + # msal + # msrest + # requests-oauthlib +scipy==1.6.1 # via -r base.in six==1.15.0 # via + # azure-core + # azure-identity # cycler + # isodate + # msrestazure # python-dateutil # tox # virtualenv +smmap==3.0.5 + # via gitdb toml==0.10.2 # via # pytest # tox -tox==3.21.3 +tox==3.22.0 # via -r dev.in -virtualenv==20.4.0 +urllib3==1.26.3 + # via requests +virtualenv==20.4.2 # via # -r dev.in # tox diff --git a/setup.py b/setup.py index 12f1dadb3c..ff348559ba 100644 --- a/setup.py +++ b/setup.py @@ -43,4 +43,8 @@ def read(*names, **kwargs): # eg: 'keyword1', 'keyword2', 'keyword3', ], python_requires='>=3', + entry_points=''' + [console_scripts] + tlo=tlo.cli:cli + ''' ) diff --git a/src/scripts/dev/scenarios/playing_22.py b/src/scripts/dev/scenarios/playing_22.py new file mode 100644 index 0000000000..9bc2a2c220 --- /dev/null +++ b/src/scripts/dev/scenarios/playing_22.py @@ -0,0 +1,63 @@ +import numpy as np + +from tlo import Date, logging +from tlo.methods import ( + contraception, + demography, + enhanced_lifestyle, + healthseekingbehaviour, + healthsystem, + labour, + pregnancy_supervisor, + symptommanager, +) +from tlo.scenario import BaseScenario + + +class Playing22(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 12 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2011, 1, 1) + self.pop_size = 200 + self.number_of_draws = 2 + self.runs_per_draw = 2 + + def log_configuration(self): + return { + 'filename': 'playing_22', + 'directory': './outputs', + 'custom_levels': { + '*': logging.INFO, + } + } + + def modules(self): + return [ + demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, disable=True, service_availability=['*']), + symptommanager.SymptomManager(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + contraception.Contraception(resourcefilepath=self.resources), + labour.Labour(resourcefilepath=self.resources), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), + ] + + def draw_parameters(self, draw_number, rng): + return { + 'Lifestyle': { + 'init_p_urban': rng.randint(10, 20) / 100.0, + 'init_p_high_sugar': 0.52, + }, + 'Labour': { + 'intercept_parity_lr2010': -10 * rng.exponential(0.1), + 'effect_age_parity_lr2010': np.linspace(0.1, 1, num=self.number_of_draws)[draw_number] + }, + } + + +if __name__ == '__main__': + from tlo.cli import scenario_run + scenario_run([__file__]) diff --git a/src/scripts/dev/scenarios/test_scenario_1.py b/src/scripts/dev/scenarios/test_scenario_1.py new file mode 100644 index 0000000000..0804725540 --- /dev/null +++ b/src/scripts/dev/scenarios/test_scenario_1.py @@ -0,0 +1,63 @@ +import numpy as np + +from tlo import Date, logging +from tlo.methods import ( + contraception, + demography, + enhanced_lifestyle, + healthseekingbehaviour, + healthsystem, + labour, + pregnancy_supervisor, + symptommanager, +) +from tlo.scenario import BaseScenario + + +class TestScenario(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 12 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2010, 6, 1) + self.pop_size = 100 + self.number_of_draws = 10 + self.runs_per_draw = 10 + + def log_configuration(self): + return { + 'filename': 'test_scenario', + 'directory': './outputs', + 'custom_levels': { + '*': logging.INFO, + } + } + + def modules(self): + return [ + demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, disable=True, service_availability=['*']), + symptommanager.SymptomManager(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + contraception.Contraception(resourcefilepath=self.resources), + labour.Labour(resourcefilepath=self.resources), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), + ] + + def draw_parameters(self, draw_number, rng): + return { + 'Lifestyle': { + 'init_p_urban': rng.randint(10, 20) / 100.0, + 'init_p_high_sugar': 0.52, + }, + 'Labour': { + 'intercept_parity_lr2010': -10 * rng.exponential(0.1), + 'effect_age_parity_lr2010': np.arange(0.1, 1.1, 0.1)[draw_number] + }, + } + + +if __name__ == '__main__': + from tlo.cli import scenario_run + scenario_run([__file__]) diff --git a/src/scripts/dev/th_testing/2D_grid/analysing_mockitis_2D_grid.py b/src/scripts/dev/th_testing/2D_grid/analysing_mockitis_2D_grid.py new file mode 100644 index 0000000000..d23cc587d3 --- /dev/null +++ b/src/scripts/dev/th_testing/2D_grid/analysing_mockitis_2D_grid.py @@ -0,0 +1,60 @@ +"""This file uses the results of the batch file to make some summary statistics. +The results of the bachrun were put into the 'outputs' results_folder +""" + +from pathlib import Path + +import matplotlib.pyplot as plt + +from tlo.analysis.utils import ( + extract_params, + extract_results, + get_grid, + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, + summarize, +) + +outputspath = Path('./outputs') + +# %% Analyse results of runs when doing a sweep of a single parameter: + +# 0) Find results_folder associated with a given batch_file and get most recent +results_folder = get_scenario_outputs('mockitis_2D_grid.py', outputspath)[-1] + +# look at one log (so can decide what to extract) +log = load_pickled_dataframes(results_folder) + +# get basic information about the results +info = get_scenario_info(results_folder) + +# 1) Extract the parameters that have varied over the set of simulations +params = extract_params(results_folder) + +# 2) Extract a series for all runs: +extracted = extract_results(results_folder, + module="tlo.methods.mockitis", + key="summary", # <-- the key used for the logging entry + column="PropInf", # <-- the column in the dataframe + index="date") # <-- optional index + +# 3) Get summary of the results for that log-element (only mean and the value at then of the simulation) +res = summarize(extracted, only_mean=True).iloc[-1] +res.name = 'z' + +# 4) Create a heatmap: + +grid = get_grid(params, res) +fig, ax = plt.subplots() +c = ax.pcolormesh( + grid['Mockitis:p_cure'], + grid['Mockitis:p_infection'], + grid['z'], + shading='nearest' +) +ax.set_title('Heat Map') +plt.xlabel('Mockitis:p_cure') +plt.ylabel('Mockitis:p_infection') +fig.colorbar(c, ax=ax) +plt.show() diff --git a/src/scripts/dev/th_testing/2D_grid/mockitis_2D_grid.py b/src/scripts/dev/th_testing/2D_grid/mockitis_2D_grid.py new file mode 100644 index 0000000000..ecd16907b4 --- /dev/null +++ b/src/scripts/dev/th_testing/2D_grid/mockitis_2D_grid.py @@ -0,0 +1,75 @@ +""" +This file defines a batch run through which the Mockitis module is run across a 2-dimensional grid of parameters + +Run on the batch system using: +```tlo batch-submit src/scripts/dev/th_testing/mockitis_2D_grid.py tlo.conf``` + +""" + +import numpy as np + +from tlo import Date, logging +from tlo.methods import ( + contraception, + demography, + dx_algorithm_child, + enhanced_lifestyle, + healthseekingbehaviour, + healthsystem, + labour, + mockitis, + pregnancy_supervisor, + symptommanager, +) +from tlo.scenario import BaseScenario + + +class MockitisBatch(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 12 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2020, 1, 1) + self.pop_size = 500 + self.number_of_draws = 6 + self.runs_per_draw = 2 + + def log_configuration(self): + return { + 'filename': 'mockitis_batch', + 'directory': './outputs', + 'custom_levels': { + '*': logging.INFO, + } + } + + def modules(self): + return [ + demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, disable=True, service_availability=['*']), + symptommanager.SymptomManager(resourcefilepath=self.resources), + dx_algorithm_child.DxAlgorithmChild(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + contraception.Contraception(resourcefilepath=self.resources), + labour.Labour(resourcefilepath=self.resources), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), + mockitis.Mockitis(resourcefilepath=self.resources) + ] + + def draw_parameters(self, draw_number, rng): + grid = self.make_grid( + {'p_infection': np.linspace(0, 1.0, 3), 'p_cure': [0.25, 0.5]} + ) + + return { + 'Mockitis': { + 'p_infection': grid['p_infection'][draw_number], + 'p_cure': grid['p_cure'][draw_number] + }, + } + + +if __name__ == '__main__': + from tlo.cli import scenario_run + scenario_run([__file__]) diff --git a/src/scripts/dev/th_testing/single_sweep/analysing_mockitis_single_sweep.py b/src/scripts/dev/th_testing/single_sweep/analysing_mockitis_single_sweep.py new file mode 100644 index 0000000000..78c84b010f --- /dev/null +++ b/src/scripts/dev/th_testing/single_sweep/analysing_mockitis_single_sweep.py @@ -0,0 +1,104 @@ +"""This file uses the results of the batch file to make some summary statistics. +The results of the bachrun were put into the 'outputs' results_folder +""" + +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + +from tlo.analysis.utils import ( + extract_params, + extract_results, + get_scenario_info, + get_scenario_outputs, + load_pickled_dataframes, + summarize, +) + +outputspath = Path('./outputs') + +# %% Analyse results of runs when doing a sweep of a single parameter: + +# 0) Find results_folder associated with a given batch_file and get most recent +results_folder = get_scenario_outputs('mockitis_single_sweep.py', outputspath)[-1] + +# look at one log (so can decide what to extract) +log = load_pickled_dataframes(results_folder) + +# get basic information about the results +info = get_scenario_info(results_folder) + +# 1) Extract the parameters that have varied over the set of simulations +params = extract_params(results_folder) + +# 2) Extract a specific log series for all runs: +extracted = extract_results(results_folder, + module="tlo.methods.mockitis", + key="summary", + column="PropInf", + index="date") + +# 3) Get summary of the results for that log-element +propinf = summarize(extracted) + +# If only interested in the means +propinf_onlymeans = summarize(extracted, only_mean=True) + +# 4) Create some plots: + +# name of parmaeter that varies +param_name = 'Mockitis:p_infection' + +# i) bar plot to summarize as the value at the end of the run +propinf_end = propinf.iloc[[-1]] + +height = propinf_end.loc[:, (slice(None), "mean")].iloc[0].values +lower_upper = np.array(list(zip( + propinf_end.loc[:, (slice(None), "lower")].iloc[0].values, + propinf_end.loc[:, (slice(None), "upper")].iloc[0].values +))).transpose() + +yerr = abs(lower_upper - height) + +xvals = range(info['number_of_draws']) +xlabels = [ + round(params.loc[(params.module_param == param_name)][['value']].loc[draw].value, 3) + for draw in range(info['number_of_draws']) +] + +fig, ax = plt.subplots() +ax.bar( + x=xvals, + height=propinf_end.loc[:, (slice(None), "mean")].iloc[0].values, + yerr=yerr +) +ax.set_xticks(xvals) +ax.set_xticklabels(xlabels) +plt.xlabel(param_name) +plt.show() + +# ii) plot to show time-series (means) +for draw in range(info['number_of_draws']): + plt.plot( + propinf.loc[:, (draw, "mean")].index, propinf.loc[:, (draw, "mean")].values, + label=f"{param_name}={round(params.loc[(params.module_param == param_name)][['value']].loc[draw].value, 3)}" + ) +plt.xlabel(propinf.index.name) +plt.legend() +plt.show() + +# iii) banded plot to show variation across runs +draw = 0 +plt.plot(propinf.loc[:, (draw, "mean")].index, propinf.loc[:, (draw, "mean")].values, 'b') +plt.fill_between( + propinf.loc[:, (draw, "mean")].index, + propinf.loc[:, (draw, "lower")].values, + propinf.loc[:, (draw, "upper")].values, + color='b', + alpha=0.5, + label=f"{param_name}={round(params.loc[(params.module_param == param_name)][['value']].loc[draw].value, 3)}" +) +plt.xlabel(propinf.index.name) +plt.legend() +plt.show() diff --git a/src/scripts/dev/th_testing/single_sweep/mockitis_single_sweep.py b/src/scripts/dev/th_testing/single_sweep/mockitis_single_sweep.py new file mode 100644 index 0000000000..eb88f65418 --- /dev/null +++ b/src/scripts/dev/th_testing/single_sweep/mockitis_single_sweep.py @@ -0,0 +1,71 @@ +""" +This file defines a batch run through which the Mockitis module is run across a sweep of a single parameter. + +Run on the batch system using: +```tlo batch-submit src/scripts/dev/th_testing/mockitis_single_sweep.py tlo.conf``` + +""" + +import numpy as np + +from tlo import Date, logging +from tlo.methods import ( + contraception, + demography, + dx_algorithm_child, + enhanced_lifestyle, + healthseekingbehaviour, + healthsystem, + labour, + mockitis, + pregnancy_supervisor, + symptommanager, +) +from tlo.scenario import BaseScenario + + +class MockitisBatch(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 12 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2020, 1, 1) + self.pop_size = 500 + self.number_of_draws = 5 + self.runs_per_draw = 5 + + def log_configuration(self): + return { + 'filename': 'mockitis_batch', + 'directory': './outputs', + 'custom_levels': { + '*': logging.INFO, + } + } + + def modules(self): + return [ + demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, disable=True, service_availability=['*']), + symptommanager.SymptomManager(resourcefilepath=self.resources), + dx_algorithm_child.DxAlgorithmChild(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + contraception.Contraception(resourcefilepath=self.resources), + labour.Labour(resourcefilepath=self.resources), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), + mockitis.Mockitis(resourcefilepath=self.resources) + ] + + def draw_parameters(self, draw_number, rng): + return { + 'Mockitis': { + 'p_infection': np.linspace(0, 0.1, self.number_of_draws)[draw_number], + }, + } + + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/profiling/batch_test.py b/src/scripts/profiling/batch_test.py new file mode 100644 index 0000000000..cd1eff6df4 --- /dev/null +++ b/src/scripts/profiling/batch_test.py @@ -0,0 +1,93 @@ +""" +A run of the model at scale using all disease modules currently included in Master - including logging +* All logging +* Script including parsing logfile + +For use in profiling. +""" +import os +import sys +from pathlib import Path + +import pandas as pd +import shared + +from tlo import Date, Simulation, logging +from tlo.analysis.utils import parse_log_file +from tlo.methods import ( + contraception, + demography, + depression, + diarrhoea, + dx_algorithm_adult, + dx_algorithm_child, + enhanced_lifestyle, + epi, + epilepsy, + healthburden, + healthseekingbehaviour, + healthsystem, + labour, + malaria, + oesophagealcancer, + pregnancy_supervisor, + symptommanager, +) + +seed_arg = sys.argv[1] + +# Key parameters about the simulation: +start_date = Date(2010, 1, 1) +end_date = start_date + pd.DateOffset(years=2) + +popsize = 2500 + +# The resource files +resourcefilepath = Path("./resources") + +log_config = { + "filename": f"batch_test_{seed_arg}", + # Write log to ${AZ_BATCH_TASK_WORKING_DIR} if the variable exists, + # otherwise to current directory + "directory": os.getenv("AZ_BATCH_TASK_WORKING_DIR", ".") + "/outputs", + "custom_levels": {"*": logging.INFO} +} + +sim = Simulation(start_date=start_date, seed=int(seed_arg), log_config=log_config) + +# Register the appropriate modules +sim.register( + # Standard modules: + demography.Demography(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + dx_algorithm_child.DxAlgorithmChild(resourcefilepath=resourcefilepath), + dx_algorithm_adult.DxAlgorithmAdult(resourcefilepath=resourcefilepath), + # + # Disease modules considered complete: + diarrhoea.Diarrhoea(resourcefilepath=resourcefilepath), + malaria.Malaria(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + depression.Depression(resourcefilepath=resourcefilepath), + oesophagealcancer.OesophagealCancer(resourcefilepath=resourcefilepath), + epilepsy.Epilepsy(resourcefilepath=resourcefilepath) +) + +# Run the simulation +sim.make_initial_population(n=popsize) +shared.schedule_profile_log(sim) +sim.simulate(end_date=end_date) +shared.print_checksum(sim) + +# Parse the log-file +log_df = parse_log_file(sim.log_filepath) + +print('TABLES:') +for k, v in log_df.items(): + print(f'{k}: {",".join(v.keys())}') diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py index 456cad8e42..af0e20bc6c 100644 --- a/src/scripts/profiling/scale_run.py +++ b/src/scripts/profiling/scale_run.py @@ -33,7 +33,7 @@ start_date = Date(2010, 1, 1) end_date = start_date + pd.DateOffset(years=2) -pop_size = 500_000 +pop_size = 100 # 500_000 # The resource files rfp = Path("./resources") diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 26a78a082c..cf5e4e47ef 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -1,7 +1,10 @@ """ General utility functions for TLO analysis """ +import os +import pickle from ast import literal_eval +from pathlib import Path import numpy as np import pandas as pd @@ -234,3 +237,153 @@ def make_calendar_period_type(): """ keys, _ = make_calendar_period_lookup() return pd.CategoricalIndex(categories=keys, ordered=True) + + +def get_scenario_outputs(scenario_filename: str, outputs_dir: Path) -> list: + """Returns paths of folders associated with a batch_file, in chronological order.""" + stub = scenario_filename.rstrip('.py') + folders = [Path(f) for f in os.scandir(outputs_dir) if f.is_dir() and f.name.startswith(stub)] + folders.sort() + return folders + + +def get_scenario_info(scenario_output_dir: Path) -> dict: + """Utility function to get the the number draws and the number of runs in a batch set. + + TODO: read the JSON file to get further information + """ + info = dict() + draw_folders = [f for f in os.scandir(scenario_output_dir) if f.is_dir()] + + info['number_of_draws'] = len(draw_folders) + + run_folders = [f for f in os.scandir(draw_folders[0]) if f.is_dir()] + info['runs_per_draw'] = len(run_folders) + + return info + + +def load_pickled_dataframes(results_folder: Path, draw=0, run=0, name=None) -> dict: + """Utility function to create a dict contaning all the logs from the specified run within a batch set.""" + folder = results_folder / str(draw) / str(run) + pickles = [p for p in os.scandir(folder) if p.name.endswith('.pickle')] + if name is not None: + pickles = [p for p in pickles if p.name in f"{name}.pickle"] + + output = dict() + for p in pickles: + name = os.path.splitext(p.name)[0] + with open(p.path, "rb") as f: + output[name] = pickle.load(f) + + return output + + +def extract_params(results_folder: Path) -> pd.DataFrame: + """Utility function to unpack results to produce a dateframe that summarizes that parameters that change across + the draws. It produces a dataframe with index of draw and columns of each parameters that is specified to be varied + in the batch. + NB. This does the extraction from run 0 in each draw, under the assumption that the over-written parameters are the + same in each run.""" + + # Get the paths for the draws + draws = [f for f in os.scandir(results_folder) if f.is_dir()] + + list_of_param_changes = list() + + for d in draws: + p = load_pickled_dataframes(results_folder, d.name, 0, name="tlo.scenario") + p = p["tlo.scenario"]["override_parameter"] + + p['module_param'] = p['module'] + ':' + p['name'] + p.index = [int(d.name)] * len(p.index) + + list_of_param_changes.append(p[['module_param', 'new_value']]) + + params = pd.concat(list_of_param_changes) + params.index.name = 'draw' + params = params.rename(columns={'new_value': 'value'}) + params = params.sort_index() + + return params + + +def extract_results(results_folder: Path, module: str, key: str, column: str, index: str = None) -> pd.DataFrame: + """Utility function to unpack results to produce a dataframe that summaries one series from the log, with column + multi-index for the draw/run. If an 'index' component of the log_element is provided, the dataframe uses that index + (but note that this will only work if the index is the same in each run).""" + + if index is not None: + # extract the index from the first log, and use this ensure that all other are exactly the same. + filename = f"{module}.pickle" + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw=0, run=0, name=filename)[module][key] + index = df.index + + # get number of draws and numbers of runs + info = get_scenario_info(results_folder) + + results = pd.DataFrame(columns=pd.MultiIndex.from_product( + [range(info['number_of_draws']), range(info['runs_per_draw'])], + names=["draw", "run"] + )) + + for draw in range(info['number_of_draws']): + for run in range(info['runs_per_draw']): + try: + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + results[draw, run] = df[column] + idx = df[column].index + assert idx.equals(index), "Indexes are not the same between runs" + + except ValueError: + results[draw, run] = np.nan + + # if 'index' is provided, set this to be the index of the results + if index is not None: + results.index = index + + return results + + +def summarize(results: pd.DataFrame, only_mean: bool = False) -> pd.DataFrame: + """Utility function to compute summary statistics that finds mean value and 95% interval across the runs for each + draw.""" + summary = pd.DataFrame( + columns=pd.MultiIndex.from_product( + [ + results.columns.unique(level='draw'), + ["mean", "lower", "upper"] + ], + names=['draw', 'stat']), + index=results.index + ) + + summary.loc[:, (slice(None), "mean")] = results.groupby(axis=1, by='draw').mean().values + summary.loc[:, (slice(None), "lower")] = results.groupby(axis=1, by='draw').quantile(0.025).values + summary.loc[:, (slice(None), "upper")] = results.groupby(axis=1, by='draw').quantile(0.975).values + + if only_mean: + # Remove other metrics and simplify if 'only_mean' is required: + om = summary.loc[:, (slice(None), "mean")] + om.columns = [c[0] for c in om.columns.to_flat_index()] + return om + + return summary + + +def get_grid(params: pd.DataFrame, res: pd.Series): + """Utility function to create the arrays needed to plot a heatmap. + params: + This is the dataframe of parameters with index=draw (made using `extract_params()`). + In res: + results of interest with index=draw (can be made using `extract_params()`) + """ + + res = pd.concat([params.pivot(columns='module_param', values='value'), res], axis=1) + piv = res.pivot_table(index=res.columns[0], columns=res.columns[1], values=res.columns[2]) + + grid = dict() + grid[res.columns[0]], grid[res.columns[1]] = np.meshgrid(piv.index, piv.columns) + grid[res.columns[2]] = piv.values + + return grid diff --git a/src/tlo/cli.py b/src/tlo/cli.py new file mode 100644 index 0000000000..1993afa06f --- /dev/null +++ b/src/tlo/cli.py @@ -0,0 +1,666 @@ +"""The TLOmodel command-line interface""" +import configparser +import datetime +import json +import math +import os +from collections import defaultdict +from pathlib import Path +from typing import Dict + +import click +import dateutil.parser +from azure import batch +from azure.batch import batch_auth +from azure.batch import models as batch_models +from azure.batch.models import BatchErrorException +from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError +from azure.identity import DefaultAzureCredential +from azure.keyvault.secrets import SecretClient +from azure.storage.fileshare import ShareClient, ShareDirectoryClient, ShareFileClient +from git import Repo + +from tlo.scenario import SampleRunner, ScenarioLoader + +JOB_LABEL_PADDING = len("State transition time") + + +@click.group() +@click.option("--config-file", type=click.Path(exists=True), default="tlo.conf", hidden=True) +@click.option("--verbose", "-v", is_flag=True, default=False) +@click.pass_context +def cli(ctx, config_file, verbose): + """tlo - the TLOmodel command line utility. + + * run scenarios locally + * submit scenarios to batch system + * query batch system about job and tasks + * download output results for completed job + """ + ctx.ensure_object(dict) + ctx.obj["config_file"] = config_file + ctx.obj["verbose"] = verbose + + +@cli.command() +@click.argument("scenario_file", type=click.Path(exists=True)) +@click.option("--draw-only", is_flag=True, help="Only generate draws; do not run the simulation") +def scenario_run(scenario_file, draw_only): + """Run the specified scenario locally. + + SCENARIO_FILE is path to file containing a scenario class + """ + scenario = load_scenario(scenario_file) + run_json = scenario.save_draws() + if draw_only: + with open(run_json) as f: + print(f.read()) + else: + runner = SampleRunner(run_json) + runner.run() + + +@cli.command() +@click.argument("scenario_file", type=click.Path(exists=True)) +@click.option("--keep-pool-alive", type=bool, default=False, is_flag=True, hidden=True) +@click.pass_context +def batch_submit(ctx, scenario_file, keep_pool_alive): + """Submit a scenario to the batch system. + + SCENARIO_FILE is path to file containing scenario class. + + Your working branch must have all changes committed and pushed to the remote repository. + This is to ensure that the copy of the code used by Azure Batch is identical to your own. + """ + print(">Setting up scenario\r", end="") + scenario_file = Path(scenario_file).as_posix() + + current_branch = is_file_clean(scenario_file) + if current_branch is False: + return + + scenario = load_scenario(scenario_file) + repo = Repo(".") + commit = next(repo.iter_commits(max_count=1, paths=scenario_file)) + run_json = scenario.save_draws(commit=commit.hexsha) + + print(">Setting up batch\r", end="") + + config = load_config(ctx.obj['config_file']) + + # ID of the Batch job. + timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + job_id = Path(scenario_file).stem + "-" + timestamp + + # Path in Azure storage where to store the files for this job + azure_directory = f"{config['DEFAULT']['USERNAME']}/{job_id}" + + batch_client = get_batch_client( + config["BATCH"]["NAME"], + config["BATCH"]["KEY"], + config["BATCH"]["URL"] + ) + + create_file_share( + config["STORAGE"]["CONNECTION_STRING"], + config["STORAGE"]["FILESHARE"] + ) + + # Recursively create all nested directories, + for idx in range(len(os.path.split(azure_directory))): + create_directory(config["STORAGE"]["CONNECTION_STRING"], + config["STORAGE"]["FILESHARE"], + "/".join(os.path.split(azure_directory)[:idx+1]), + ) + + upload_local_file(config["STORAGE"]["CONNECTION_STRING"], + run_json, + config["STORAGE"]["FILESHARE"], + azure_directory + "/" + os.path.basename(run_json), + ) + + # Configuration of the pool: type of machines and number of nodes. + vm_size = config["BATCH"]["POOL_VM_SIZE"] + # TODO: cap the number of nodes in the pool? Take the number of nodes in + # input from the user, but always at least 2? + pool_node_count = max(2, math.ceil(scenario.number_of_draws * scenario.runs_per_draw)) + + # User identity in the Batch tasks + auto_user = batch_models.AutoUserSpecification( + elevation_level=batch_models.ElevationLevel.admin, + scope=batch_models.AutoUserScope.task, + ) + + user_identity = batch_models.UserIdentity( + auto_user=auto_user, + ) + + # URL of the Azure File share + azure_file_url = "https://{}.file.core.windows.net/{}".format( + config["STORAGE"]["NAME"], + config["STORAGE"]["FILESHARE"], + ) + + # Specify a container registry + container_registry = batch_models.ContainerRegistry( + registry_server=config["REGISTRY"]["SERVER"], + user_name=config["REGISTRY"]["NAME"], + password=config["REGISTRY"]["KEY"], + ) + + # Name of the image in the registry + image_name = config["REGISTRY"]["SERVER"] + "/" + config["REGISTRY"]["IMAGE_NAME"] + + # Create container configuration, prefetching Docker images from the container registry + container_conf = batch_models.ContainerConfiguration( + container_image_names=[image_name], + container_registries=[container_registry], + ) + + # Options for running the Docker container + container_run_options = "--rm --workdir /TLOmodel" + + # Directory where the file share will be mounted, relative to + # ${AZ_BATCH_NODE_MOUNTS_DIR}. + file_share_mount_point = "mnt" + + azure_file_share_configuration = batch_models.AzureFileShareConfiguration( + account_name=config["STORAGE"]["NAME"], + azure_file_url=azure_file_url, + account_key=config["STORAGE"]["KEY"], + relative_mount_path=file_share_mount_point, + mount_options="-o rw", + ) + + mount_configuration = batch_models.MountConfiguration( + azure_file_share_configuration=azure_file_share_configuration, + ) + + azure_directory = "${{AZ_BATCH_NODE_MOUNTS_DIR}}/" + \ + f"{file_share_mount_point}/{azure_directory}" + azure_run_json = f"{azure_directory}/{os.path.basename(run_json)}" + working_dir = "${{AZ_BATCH_TASK_WORKING_DIR}}" + task_dir = "${{AZ_BATCH_TASK_DIR}}" + gzip_pattern_match = "{{txt,log}}" + command = f""" + git fetch --all + git checkout -b {current_branch} origin/{current_branch} + git pull + pip install -r requirements/base.txt + tlo --config-file tlo.example.conf batch-run {azure_run_json} {working_dir} {{draw_number}} {{run_number}} + cp {task_dir}/std*.txt {working_dir}/{{draw_number}}/{{run_number}}/. + gzip {working_dir}/{{draw_number}}/{{run_number}}/*.{gzip_pattern_match} + cp -r {working_dir}/* {azure_directory}/. + """ + command = f"/bin/bash -c '{command}'" + + try: + # Create the job that will run the tasks. + create_job(batch_client, vm_size, pool_node_count, job_id, + container_conf, [mount_configuration], keep_pool_alive) + + # Add the tasks to the job. + add_tasks(batch_client, user_identity, job_id, image_name, + container_run_options, scenario, command) + + except batch_models.BatchErrorException as err: + print_batch_exception(err) + raise + + print(f"Job ID: {job_id}") + + +@cli.command(hidden=True) +@click.argument("path_to_json", type=click.Path(exists=True)) +@click.argument("work_directory", type=click.Path(exists=True)) +@click.argument("draw", type=int) +@click.argument("sample", type=int) +def batch_run(path_to_json, work_directory, draw, sample): + """Runs the specified draw and sample for the Scenario""" + runner = SampleRunner(path_to_json) + output_directory = Path(work_directory) / f"{draw}/{sample}" + output_directory.mkdir(parents=True, exist_ok=True) + runner.run_sample_by_number(output_directory, draw, sample) + + +@cli.command() +@click.argument("job_id", type=str) +@click.option("show_tasks", "--tasks", is_flag=True, default=False, help="Display task information") +@click.option("--raw", default=False, help="Display raw output (only when retrieving using --id)", + is_flag=True, hidden=True) +@click.pass_context +def batch_job(ctx, job_id, raw, show_tasks): + """Display information about a specific job.""" + print(">Querying batch system\r", end="") + config = load_config(ctx.obj['config_file']) + batch_client = get_batch_client( + config["BATCH"]["NAME"], + config["BATCH"]["KEY"], + config["BATCH"]["URL"] + ) + tasks = None + + try: + job = batch_client.job.get(job_id=job_id) + if show_tasks: + tasks = batch_client.task.list(job_id) + except BatchErrorException as e: + print(e.message.value) + return + + job = job.as_dict() + + if raw: + print(json.dumps(job, sort_keys=True, indent=2)) + print(json.dumps(tasks, sort_keys=True, indent=2)) + return + + print_basic_job_details(job) + + if tasks is not None: + print() + print("Tasks\n-----") + total = 0 + state_counts = defaultdict(int) + for task in tasks: + task = task.as_dict() + dt = dateutil.parser.isoparse(task['state_transition_time']) + dt = dt.strftime("%d %b %Y %H:%M") + total += 1 + state_counts[task['state']] += 1 + running_time = "" + if task["state"] == "completed": + if "execution_info" in task: + start_time = dateutil.parser.isoparse(task["execution_info"]["start_time"]) + end_time = dateutil.parser.isoparse(task["execution_info"]["end_time"]) + running_time = str(end_time - start_time).split(".")[0] + print(f"{task['id']}\t\t{task['state']}\t\t{dt}\t\t{running_time}") + print() + status_line = [] + for k, v in state_counts.items(): + status_line.append(f"{k}: {v}") + status_line.append(f"total: {total}") + print("; ".join(status_line)) + + if job["state"] == "completed": + print("\nTo download output run:\n") + print(f"\ttlo batch-download {job_id}") + + +@cli.command() +@click.option("--find", "-f", type=str, default=None, help="Show jobs where identifier contains supplied string") +@click.option("--completed", "status", flag_value="completed", default=False, multiple=True, + help="Only display completed jobs") +@click.option("--active", "status", flag_value="active", default=False, multiple=True, + help="Only display active jobs") +@click.option("-n", default=5, type=int, help="Maximum number of jobs to list (default is 5)") +@click.pass_context +def batch_list(ctx, status, n, find): + """List and find running and completed jobs.""" + print(">Querying batch system\r", end="") + config = load_config(ctx.obj["config_file"]) + batch_client = get_batch_client( + config["BATCH"]["NAME"], + config["BATCH"]["KEY"], + config["BATCH"]["URL"] + ) + + # get list of all batch jobs + jobs = batch_client.job.list( + job_list_options=batch_models.JobListOptions( + expand='stats' + ) + ) + count = 0 + for job in jobs: + jad = job.as_dict() + print_job = False + if (status is None or + ("completed" in status and jad["state"] == "completed") or + ("active" in status and jad["state"] == "active")): + if find is not None: + if find in jad["id"]: + print_job = True + else: + print_job = True + + if print_job: + print_basic_job_details(jad) + if "stats" in jad: + print(f"{'Succeeded tasks'.ljust(JOB_LABEL_PADDING)}: {jad['stats']['num_succeeded_tasks']}") + print(f"{'Failed tasks'.ljust(JOB_LABEL_PADDING)}: {jad['stats']['num_failed_tasks']}") + print() + count += 1 + if count == n: + break + + +def print_basic_job_details(job: dict): + """Display basic job information""" + job_labels = { + "id": "ID", + "creation_time": "Creation time", + "state": "State", + "state_transition_time": "State transition time" + } + for _k, _v in job_labels.items(): + if _v.endswith("time"): + _dt = dateutil.parser.isoparse(job[_k]) + _dt = _dt.strftime("%d %b %Y %H:%M") + print(f"{_v.ljust(JOB_LABEL_PADDING)}: {_dt}") + else: + print(f"{_v.ljust(JOB_LABEL_PADDING)}: {job[_k]}") + + +@cli.command() +@click.argument("job_id", type=str) +@click.option("--username", type=str, hidden=True) +@click.option("--verbose", default=False, is_flag=True, hidden=True) +@click.pass_context +def batch_download(ctx, job_id, username, verbose): + """Download output files for a job.""" + config = load_config(ctx.obj["config_file"]) + + directory_count = 0 + + def walk_fileshare(dir_name): + """Recursively visit directories, create local directories and download files""" + nonlocal directory_count + try: + directories = list(share_client.list_directories_and_files(dir_name)) + except ResourceNotFoundError as e: + print("ERROR:", dir_name, "not found.") + print() + print(e.message) + return + create_dir = Path(".", "outputs", dir_name) + os.makedirs(create_dir, exist_ok=True) + if verbose: + print("Creating directory", str(create_dir)) + print("Downloading", dir_name) + + for item in directories: + if item["is_directory"]: + walk_fileshare(f"{dir_name}/{item['name']}") + print(f"\r{directory_count} directories downloaded", end="") + directory_count += 1 + else: + filepath = f"{dir_name}/{item['name']}" + file_client = share_client.get_file_client(filepath) + dest_file_name = Path(".", "outputs", dir_name, item["name"]) + if verbose: + print("File:", filepath, "\n\t->", dest_file_name) + with open(dest_file_name, "wb") as data: + # Download the file from Azure into a stream + stream = file_client.download_file() + # Write the stream to the local file + data.write(stream.readall()) + + if username is None: + username = config["DEFAULT"]["USERNAME"] + + share_client = ShareClient.from_connection_string(config['STORAGE']['CONNECTION_STRING'], + config['STORAGE']['FILESHARE']) + + # if the job directory exist, exit with error + top_level = f"{username}/{job_id}" + destination = Path(".", "outputs", top_level) + if os.path.exists(destination): + print("ERROR: Local directory already exists. Please move or delete.") + print("Directory:", destination) + return + + print(f"Downloading {top_level}") + walk_fileshare(top_level) + print("\rDownload complete. ") + + +def load_config(config_file): + """Load configuration for accessing Batch services""" + config = configparser.ConfigParser() + config.read(config_file) + server_config = load_server_config(config["AZURE"]["KV_URI"], config["AZURE"]["TENANT_ID"]) + merged_config = {**config, **server_config} + return merged_config + + +def load_server_config(kv_uri, tenant_id) -> Dict[str, Dict]: + """Retrieve the server configuration for running Batch using the user"s Azure credentials + + Allows user to login using credentials from Azure CLI or interactive browser. + + On Windows, login might fail because pywin32 is not installed correctly. Resolve by + running (as Administrator) `python Scripts\\pywin32_postinstall.py -install` + For more information, see https://github.com/mhammond/pywin32/issues/1431 + """ + credential = DefaultAzureCredential( + interactive_browser_tenant_id=tenant_id, + exclude_cli_credential=False, + exclude_interactive_browser_credential=False, + exclude_environment_credential=True, + exclude_managed_identity_credential=True, + exclude_visual_studio_code_credential=True, + exclude_shared_token_cache_credential=True + ) + + client = SecretClient(vault_url=kv_uri, credential=credential) + storage_config = json.loads(client.get_secret("storageaccount").value) + batch_config = json.loads(client.get_secret("batchaccount").value) + registry_config = json.loads(client.get_secret("registryserver").value) + + return {"STORAGE": storage_config, "BATCH": batch_config, "REGISTRY": registry_config} + + +def get_batch_client(name, key, url): + """Create a Batch service client""" + credentials = batch_auth.SharedKeyCredentials(name, key) + batch_client = batch.BatchServiceClient(credentials, batch_url=url) + return batch_client + + +def load_scenario(scenario_file): + """Load the Scenario class from the specified file""" + scenario_path = Path(scenario_file) + scenario_class = ScenarioLoader(scenario_path.parent / scenario_path.name).get_scenario() + print(f"Found class {scenario_class.__class__.__name__} in {scenario_path}") + return scenario_class + + +def is_file_clean(scenario_file): + """Checks whether the scenario file and current branch is clean and unchanged. + + :returns: current branch name if all okay, False otherwise + """ + repo = Repo(".") # assumes you're running tlo command from TLOmodel root directory + + if scenario_file in repo.untracked_files: + click.echo( + f"ERROR: Untracked file {scenario_file}. Add file to repository, commit and push." + ) + return False + + if repo.is_dirty(path=scenario_file): + click.echo( + f"ERROR: Uncommitted changes in file {scenario_file}. Rollback or commit+push changes." + ) + return False + + current_branch = repo.head.reference + commits_ahead = list(repo.iter_commits(f"origin/{current_branch}..{current_branch}")) + commits_behind = list(repo.iter_commits(f"{current_branch}..origin/{current_branch}")) + if not len(commits_behind) == len(commits_ahead) == 0: + click.echo(f"ERROR: Branch '{current_branch}' isn't in-sync with remote: " + f"{len(commits_ahead)} ahead; {len(commits_behind)} behind. " + "Push and/or pull changes.") + return False + + return current_branch + + +def print_batch_exception(batch_exception): + """Prints the contents of the specified Batch exception. + + :param batch_exception: + """ + print("-------------------------------------------") + print("Exception encountered:") + if batch_exception.error and \ + batch_exception.error.message and \ + batch_exception.error.message.value: + print(batch_exception.error.message.value) + if batch_exception.error.values: + print() + for mesg in batch_exception.error.values: + print(f"{mesg.key}:\t{mesg.value}") + print("-------------------------------------------") + + +def create_file_share(connection_string, share_name): + """Uses a ShareClient object to create a share if it does not exist.""" + try: + # Create a ShareClient from a connection string + share_client = ShareClient.from_connection_string( + connection_string, share_name) + + print("Creating share:", share_name) + share_client.create_share() + + except ResourceExistsError as ex: + print("ResourceExistsError:", ex.message.splitlines()[0]) + + +def create_directory(connection_string, share_name, dir_name): + """Creates a directory in the root of the specified file share by using a + ShareDirectoryClient object. + """ + try: + # Create a ShareDirectoryClient from a connection string + dir_client = ShareDirectoryClient.from_connection_string( + connection_string, share_name, dir_name) + + print("Creating directory:", share_name + "/" + dir_name) + dir_client.create_directory() + + except ResourceExistsError as ex: + print("ResourceExistsError:", ex.message.splitlines()[0]) + + +def upload_local_file(connection_string, local_file_path, share_name, dest_file_path): + """Uploads the contents of the specified file into the specified directory in + the specified Azure file share. + """ + try: + source_file = open(local_file_path, "rb") + data = source_file.read() + + # Create a ShareFileClient from a connection string + file_client = ShareFileClient.from_connection_string( + connection_string, share_name, dest_file_path) + + print("Uploading to:", share_name + "/" + dest_file_path) + file_client.upload_file(data) + + except ResourceExistsError as ex: + print("ResourceExistsError:", ex.message) + + except ResourceNotFoundError as ex: + print("ResourceNotFoundError:", ex.message) + + +def create_job(batch_service_client, vm_size, pool_node_count, job_id, + container_conf, mount_configuration, keep_pool_alive): + """Creates a job with the specified ID, associated with the specified pool. + + :param batch_service_client: A Batch service client. + :type batch_service_client: `azure.batch.BatchServiceClient` + :param str vm_size: Type of virtual machine to use as pool. + :param int pool_node_count: Number of nodes in the pool. + :param str job_id: The ID for the job. + :param container_conf: Configuration of a container. + :type container_conf: `azure.batch.models.ContainerConfiguration` + :param mount_configuration: Configuration of the images to mount on the nodes. + :type mount_configuration: `list[azure.batch.models.MountConfiguration]` + :param bool keep_pool_alive: auto pool lifetime configuration - use to debug + """ + print("Creating job.") + + image_reference = batch_models.ImageReference( + publisher="microsoft-azure-batch", + offer="ubuntu-server-container", + sku="16-04-lts", + version="latest", + ) + + virtual_machine_configuration = batch_models.VirtualMachineConfiguration( + image_reference=image_reference, + container_configuration=container_conf, + node_agent_sku_id="batch.node.ubuntu 16.04", + ) + + pool = batch_models.PoolSpecification( + virtual_machine_configuration=virtual_machine_configuration, + vm_size=vm_size, + target_dedicated_nodes=pool_node_count, + mount_configuration=mount_configuration, + task_slots_per_node=1 + ) + + auto_pool_specification = batch_models.AutoPoolSpecification( + pool_lifetime_option="job", + pool=pool, + keep_alive=keep_pool_alive, + ) + + pool_info = batch_models.PoolInformation( + auto_pool_specification=auto_pool_specification, + ) + + job = batch_models.JobAddParameter( + id=job_id, + pool_info=pool_info, + on_all_tasks_complete="terminateJob", + ) + + batch_service_client.job.add(job) + + +def add_tasks(batch_service_client, user_identity, job_id, + image_name, container_run_options, scenario, command): + """Adds the simulation tasks in the collection to the specified job. + + :param batch_service_client: A Batch service client. + :type batch_service_client: `azure.batch.BatchServiceClient` + :param user_identity: User account to use in the jobs. + :type user_identity: `azure.batch.models.UserIdentity` + :param str job_id: The ID of the job to which to add the tasks. + :param str image_name: Name of the Docker image to mount for the task. + :param str container_run_options: Options to pass to Docker to run the image. + :param scenario.Scenario scenario: instance of Scenario we're running + :param str command: Command to run during the taks inside the Docker image. + """ + + print("Adding {} task(s) to job.".format(scenario.number_of_draws * scenario.runs_per_draw)) + + tasks = list() + + task_container_settings = batch_models.TaskContainerSettings( + image_name=image_name, + container_run_options=container_run_options, + ) + + for draw_number in range(0, scenario.number_of_draws): + for run_number in range(0, scenario.runs_per_draw): + cmd = command.format(draw_number=draw_number, run_number=run_number) + task = batch_models.TaskAddParameter( + id=f"draw_{draw_number}-run_{run_number}", + command_line=cmd, + container_settings=task_container_settings, + user_identity=user_identity, + ) + tasks.append(task) + + batch_service_client.task.add_collection(job_id, tasks) + + +if __name__ == '__main__': + cli(obj={}) diff --git a/src/tlo/logging/core.py b/src/tlo/logging/core.py index 315d1f98f0..bd9ba0d610 100644 --- a/src/tlo/logging/core.py +++ b/src/tlo/logging/core.py @@ -19,6 +19,15 @@ def getLogger(name='tlo'): return _LOGGERS[name] +class _MockSim: + # used as place holder for any logging that happens before simulation is setup! + class MockDate: + @staticmethod + def isoformat(): + return "0000-00-00T00:00:00" + date = MockDate() + + class Logger: """A Logger for TLO log messages, with simplified usage. Outputs structured log messages in JSON format and is connected to the Simulation instance.""" @@ -41,7 +50,7 @@ def __init__(self, name: str, level=_logging.NOTSET): self.keys = dict() # populated by init_logging(simulation) for the top-level "tlo" logger - self.simulation = None + self.simulation = _MockSim() # a logger should only be using old-style or new-style logging, not a mixture self.logged_stdlib = False @@ -78,7 +87,7 @@ def reset_attributes(self): # clear all logger settings self.handlers.clear() self.keys.clear() - self.simulation = None + self.simulation = _MockSim() # boolean attributes used for now, can be removed after transition to structured logging self.logged_stdlib = False self.logged_structured = False diff --git a/src/tlo/scenario.py b/src/tlo/scenario.py new file mode 100644 index 0000000000..d079193a88 --- /dev/null +++ b/src/tlo/scenario.py @@ -0,0 +1,334 @@ +"""Creating and running simulation scenarios for TLOmodel + +Scenarios are used to specify, configure and run a single or set of TLOmodel simulations. A scenario is created by +by subclassing BaseScenario and specifying the scenario options therein. You can override parameters of the module +in various ways in the scenario. See the BaseScenario class for more information. + +The subclass of BaseScenario is then used to create "draws", which can be considered a fully-specified configuration +of the scenario, or a parameter draw. + +Each "draw" is "run" one or more times - run is a single execution of the TLOmodule simulation. Each run of a draw has +a different seed but is otherwise identical. Each run has its own simulation seed, introducing randomness into each +simulation. A collection of runs for a given draw describes the random variation in the simulation. + +In summary: + +* A _scenario_ specifies the configuration of the TLOmodule simulation. The simulation start and end dates, initial +population size, logging setup and registered modules. Optionally, you can also override parameters of modules. +* A _draw_ is a realisation of a scenario configuration. A scenario can have one or more draws. Draws are uninteresting +unless you are overriding parameters. If you do not override any model parameters, you would only have one draw. +* A _run_ is the result of running the simulation using a specific configuration. Each draw would run one or more +times. Each run for the same draw would have identical configuration except the simulation seed. +""" +import json +import pickle +from pathlib import Path, PurePosixPath + +import numpy as np +import pandas as pd + +from tlo import Simulation, logging +from tlo.analysis.utils import parse_log_file + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +MAX_INT = 2**31 - 1 + + +class BaseScenario: + """An abstract base class for creating Scenarios + + A scenario is a configuration of a TLOmodule simulation. Users should create a subclass of this class and implement + the following methods: + + * __init__ - to set scenario attributes + * log_configuration - to configure filename, directory and logging levels for simulation output + * modules - to list disease, intervention and health system modules for the simulation + * draw_parameters - override parameters for draws from the scenario + """ + def __init__(self): + """Constructor for BaseScenario + """ + self.seed = None + self.rng = None + self.resources = Path("./resources") + self.number_of_draws = 1 + self.runs_per_draw = 1 + self.scenario_path = None + + def log_configuration(self, **kwargs): + """Implementations return a dictionary configuring logging. Example: + + return { + 'filename': 'test_scenario', + 'directory': './outputs', + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO + } + } + """ + raise NotImplementedError + + def modules(self): + """Implementations return a list of instances of module to register in the simulation. Example: + + return [ + demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, disable=True, service_availability=['*']), + ... + ] + """ + raise NotImplementedError + + def draw_parameters(self, draw_number, rng): + """Implementations return a dictionary of parameters to override for each draw. + + The overridden parameters must be scalar (i.e. float, integer or string) as the following examples demonstrate. + The argument `draw_number` and a random number generator are available, if required. + + * Change a parameter to a fixed value: { 'Labour': { 'average_age_at_pregnancy': 25 } } + * Sample a value from a distribution: {'Lifestyle': { 'init_p_urban': rng.randint(10, 20) / 100.0 } } + * Set a value based on the draw number: { 'Labour': { 'average_age_at_pregnancy': [25, 30, 35][draw_number] } } + + Implementing this method in a subclass is optional. If no parameters are to be overridden, returns None. If no + parameters are overridden, only one draw of the scenario is required. + + A full example for a scenario with 10 draws: + + return { + 'Lifestyle': { + 'init_p_urban': rng.randint(10, 20) / 100.0, + }, + 'Labour': { + 'average_age_at_pregnancy': -10 * rng.exponential(0.1), + 'some_other_parameter': np.arange(0.1, 1.1, 0.1)[draw_number] + }, + } + """ + return None + + def save_draws(self, **kwargs): + generator = DrawGenerator(self, self.number_of_draws, self.runs_per_draw) + output_path = self.scenario_path.parent / f"{self.scenario_path.stem}_draws.json" + # assert not os.path.exists(output_path), f'Cannot save run config to {output_path} - file already exists' + config = generator.get_run_config(self.scenario_path) + if len(kwargs) > 0: + for k, v in kwargs.items(): + config[k] = v + if "commit" in config: + github_url = f"https://github.com/UCL/TLOmodel/blob/{config['commit']}/{config['scenario_script_path']}" + config["github"] = github_url + generator.save_config(config, output_path) + return output_path + + def make_grid(self, ranges: dict) -> pd.DataFrame: + """Utility function to flatten an n-dimension grid of parameters for use in scenarios + + Typically used in draw_parameters determining a set of parameters for a draw. This function will check that the + number of draws of the scenario is equal to the number of coordinates in the grid. + + Parameter 'ranges' is a dictionary of { string key: iterable }, where iterable can be, for example, an np.array + or list. The function will return a DataFrame where each key is a column and each row represents a single + coordinate in the grid. + + Usage (in draw_parameters): + + grid = self.make_grid({'p_one': np.linspace(0, 1.0, 5), 'p_two': np.linspace(3.0, 4.0, 2)}) + return { + 'Mockitis': { + grid['p_one'][draw_number], + grid['p_two'][draw_number] + } + } + """ + grid = np.meshgrid(*ranges.values()) + flattened = [g.ravel() for g in grid] + positions = np.stack(flattened, axis=1) + grid_lookup = pd.DataFrame(positions, columns=ranges.keys()) + assert self.number_of_draws == len(grid_lookup), f"{len(grid_lookup)} coordinates in grid, " \ + f"but number_of_draws is {self.number_of_draws}." + return grid_lookup + + +class ScenarioLoader: + """A utility class to load a scenario class from a file path""" + def __init__(self, scenario_path): + scenario_module = ScenarioLoader._load_scenario_script(scenario_path) + scenario_class = ScenarioLoader._get_scenario_class(scenario_module) + self.scenario = scenario_class() + self.scenario.scenario_path = scenario_path + + @staticmethod + def _load_scenario_script(path): + import importlib.util + spec = importlib.util.spec_from_file_location(Path(path).stem, path) + foo = importlib.util.module_from_spec(spec) + spec.loader.exec_module(foo) + return foo + + @staticmethod + def _get_scenario_class(scenario_module): + import inspect + classes = inspect.getmembers(scenario_module, inspect.isclass) + classes = [c for (n, c) in classes if BaseScenario == c.__base__] + assert len(classes) == 1, "Exactly one subclass of BaseScenario should be defined in the scenario script" + return classes[0] + + def get_scenario(self): + return self.scenario + + +class DrawGenerator: + """Creates and saves a JSON representation of draws from a scenario.""" + def __init__(self, scenario_class, number_of_draws, runs_per_draw): + self.scenario = scenario_class + + assert self.scenario.seed is not None, "Must set a seed for the scenario. Add `self.seed = `" + self.scenario.rng = np.random.RandomState(seed=self.scenario.seed) + self.number_of_draws = number_of_draws + self.runs_per_draw = runs_per_draw + self.draws = self.setup_draws() + + def setup_draws(self): + assert self.scenario.number_of_draws > 0, "Number of draws must be greater than one" + assert self.scenario.runs_per_draw > 0, "Number of samples/draw must be greater than 0" + if self.scenario.draw_parameters(1, self.scenario.rng) is None: + assert self.scenario.number_of_draws == 1, "Number of draws should equal one if no variable parameters" + return [self.get_draw(d) for d in range(0, self.scenario.number_of_draws)] + + def get_draw(self, draw_number): + return { + "draw_number": draw_number, + "draw_seed": self.scenario.rng.randint(MAX_INT), + "parameters": self.scenario.draw_parameters(draw_number, self.scenario.rng), + } + + def get_run_config(self, scenario_path): + return { + "scenario_script_path": str(PurePosixPath(scenario_path)), + "scenario_seed": self.scenario.seed, + "runs_per_draw": self.runs_per_draw, + "draws": self.draws, + } + + def save_config(self, config, output_path): + with open(output_path, "w") as f: + f.write(json.dumps(config, indent=2)) + + +class SampleRunner: + """Reads scenario draws from a JSON configuration and handles running of samples""" + def __init__(self, run_configuration_path): + with open(run_configuration_path, "r") as f: + self.run_config = json.load(f) + self.scenario = ScenarioLoader(self.run_config["scenario_script_path"]).get_scenario() + logger.info(key="message", data=f"Loaded scenario using {run_configuration_path}") + logger.info(key="message", data=f"Found {self.number_of_draws} draws; {self.runs_per_draw} runs/draw") + + @property + def number_of_draws(self): + return len(self.run_config["draws"]) + + @property + def runs_per_draw(self): + return self.run_config["runs_per_draw"] + + def get_draw(self, draw_number): + total = self.number_of_draws + assert draw_number < total, f"Cannot get draw {draw_number}; only {total} defined." + return self.run_config["draws"][draw_number] + + def get_samples_for_draw(self, draw): + for sample_number in range(0, self.run_config["runs_per_draw"]): + yield self.get_sample(draw, sample_number) + + def get_sample(self, draw, sample_number): + assert sample_number < self.scenario.runs_per_draw, \ + f"Cannot get sample {sample_number}; samples/draw={self.scenario.runs_per_draw}" + sample = draw.copy() + sample["sample_number"] = sample_number + + # Instead of using the random number generator to create a seed for the simulation, we use an integer hash + # function to create an integer based on the sum of the draw_number and sample_number. This means the + # seed can be created independently and out-of-order (i.e. instead of sampling a seed for each sample in order) + sample["simulation_seed"] = SampleRunner.low_bias_32(sample["draw_seed"] + sample_number) + return sample + + def run_sample_by_number(self, output_directory, draw_number, sample_number): + draw = self.get_draw(draw_number) + sample = self.get_sample(draw, sample_number) + self.run_sample(sample, output_directory) + + def run_sample(self, sample, output_directory=None): + log_config = self.scenario.log_configuration() + if output_directory is not None: + log_config["directory"] = output_directory + + sim = Simulation( + start_date=self.scenario.start_date, + seed=sample["simulation_seed"], + log_config=log_config + ) + sim.register(*self.scenario.modules()) + + if sample["parameters"] is not None: + self.override_parameters(sim, sample["parameters"]) + + sim.make_initial_population(n=self.scenario.pop_size) + sim.simulate(end_date=self.scenario.end_date) + outputs = parse_log_file(sim.log_filepath) + for key, output in outputs.items(): + if key.startswith("tlo."): + with open(Path(log_config["directory"]) / f"{key}.pickle", "wb") as f: + pickle.dump(output, f) + + def run(self): + for draw in self.run_config["draws"]: + for sample in self.get_samples_for_draw(draw): + self.run_sample(sample) + + @staticmethod + def override_parameters(sim, overridden_params): + for module_name, overrides in overridden_params.items(): + if module_name in sim.modules: + module = sim.modules[module_name] + for param_name, param_val in overrides.items(): + assert param_name in module.PARAMETERS, f"{module} does not have parameter '{param_name}'" + assert np.isscalar(param_val), f"Parameter value '{param_val}' is not scalar type (float, int, str)" + + old_value = module.parameters[param_name] + assert type(old_value) == type(param_val), f"Cannot override parameter '{param_name}' - wrong type" + + module.parameters[param_name] = param_val + logger.info( + key="override_parameter", + data={ + "module": module_name, + "name": param_name, + "old_value": old_value, + "new_value": module.parameters[param_name] + } + ) + + @staticmethod + def low_bias_32(x): + """A simple integer hash function with uniform distribution. Following description taken from + https://github.com/skeeto/hash-prospector + + The integer hash function transforms an integer hash key into an integer hash result. For a hash function, + the distribution should be uniform. This implies when the hash result is used to calculate hash bucket + address, all buckets are equally likely to be picked. In addition, similar hash keys should be hashed to + very different hash results. Ideally, a single bit change in the hash key should influence all bits of the + hash result. + + :param: x an integer + :returns: an integer + """ + x *= 0x7feb352d + x ^= x >> 15 + x *= 0x846ca68b + x ^= x >> 16 + return x % (2 ** 32) diff --git a/tlo.example.conf b/tlo.example.conf new file mode 100644 index 0000000000..b5f4dc32cb --- /dev/null +++ b/tlo.example.conf @@ -0,0 +1,11 @@ +[DEFAULT] +# Email address or other unique identifier +# This is used, for example, when saving data in shared file store +USERNAME = your@email.com + +[AZURE] +# For accessing Azure Batch services. Contact your administrator for values +KV_URI = https:// +TENANT_ID = + +