From 872946cf103abdc8c5da165439f94418206d0b40 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 4 Apr 2024 13:22:36 -0400 Subject: [PATCH 01/27] Setup Python package blueprint. --- .gitignore | 3 ++ pom.xml | 21 ++++++++- python/LICENSE | 1 + python/README.md | 45 +++++++++++++++++++ python/README.rst | 1 + python/pyproject.toml | 58 +++++++++++++++++++++++++ python/tests/test_top_level_elements.py | 9 ++++ 7 files changed, 136 insertions(+), 2 deletions(-) create mode 120000 python/LICENSE create mode 100644 python/README.md create mode 120000 python/README.rst create mode 100644 python/pyproject.toml create mode 100644 python/tests/test_top_level_elements.py diff --git a/.gitignore b/.gitignore index fcaeaaa8..8888f6a6 100644 --- a/.gitignore +++ b/.gitignore @@ -127,6 +127,9 @@ nb-configuration.xml ### Python template !requirements.txt +# We do not track the generated Protobuf files for now. +python/**/*_pb2.py + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/pom.xml b/pom.xml index a9a94d57..e2b88a65 100644 --- a/pom.xml +++ b/pom.xml @@ -187,16 +187,33 @@ + java-cpp-js compile test-compile - compile-python - test-compile-python compile-cpp test-compile-cpp compile-js + + compile-python + + compile-python + + + python/src + + + + compile-python-test + + test-compile-python + + + python/tests + + diff --git a/python/LICENSE b/python/LICENSE new file mode 120000 index 00000000..ea5b6064 --- /dev/null +++ b/python/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/python/README.md b/python/README.md new file mode 100644 index 00000000..69b4bab4 --- /dev/null +++ b/python/README.md @@ -0,0 +1,45 @@ +# README + +This folder contains the files required for building and deployment of `phenopackets` Python package. + +## How to release + +Generate the Python sources by running Maven up to `test` or more: + +```shell +./mvnw test +``` + +Maven will run the Protobuf compiler and write the generated Python classes into `src` folder. + +We must do some postprocessing to maintain backward compatibility with the previous `phenopackets` versions. +Therefore, we run a script to put all elements of Phenopacket Schema v2.0.2 into the top-level `phenopackets` package. + +TODO(iimpulse): implement + +Now is the time to run tests. Note, the tests can only be run *after* installing the package with `test` dependencies! +Let's install the package and run the tests: + +```shell +# Install the `phenopackets` with test dependencies +python3 -m pip install .[test] + +# Run the tests +pytest +``` + +If the tests pass, we can build and deploy the package to PyPi. +To do so, we will need the `build` and `twine` Python packages in the environment: + +```shell +# Build +python3 -m build + +# Deploy +python3 -m twine upload dist/* + +# Clear the deployed files +rm -rf dist +``` + +That's it! \ No newline at end of file diff --git a/python/README.rst b/python/README.rst new file mode 120000 index 00000000..89a01069 --- /dev/null +++ b/python/README.rst @@ -0,0 +1 @@ +../README.rst \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 00000000..26f195c3 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,58 @@ +[build-system] +requires = ["setuptools>=65.6.3", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "phenopackets" +version = "2.0.2.post2" # IMPORTANT: This must be kept in sync with `project.version` in the top-level `pom.xml` +requires-python = ">=3.8" +description = "Phenopacket Schema" +readme = "README.rst" +authors = [ + { name = "Michael Gargano", email = "michael.gargano@jax.org" }, + { name = "Daniel Danis", email = "daniel.gordon.danis@protonmail.com" }, + { name = "Jules Jacobsen", email = "j.jacobsen@qmul.ac.uk" }, + { name = "Chris Mungall", email = "cjmungall@lbl.gov" }, + { name = "Peter Robinson", email = "peter.robinson@bih-charite.de" }, +] +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +keywords = [ + "Global Alliance for Genomics and Health", + "GA4GH Phenopacket Schema", + "GA4GH", +] + +dependencies = [ + # TODO: `protobuf>=3.21.7`? + # Maven Central points out presence a security vulnerability in the older versions. + "protobuf>=3.15.0,<4.0.0", +] + +[project.optional-dependencies] +test = [ + "pytest>=7.0.0, <8.0.0", +] + +[project.urls] +homepage = "https://github.com/phenopackets/phenopacket-schema" +repository = "https://github.com/phenopackets/phenopacket-schema.git" +documentation = "https://github.com/phenopackets/phenopacket-schema" +bugtracker = "https://github.com/phenopackets/phenopacket-schema/issues" + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] \ No newline at end of file diff --git a/python/tests/test_top_level_elements.py b/python/tests/test_top_level_elements.py new file mode 100644 index 00000000..82a65365 --- /dev/null +++ b/python/tests/test_top_level_elements.py @@ -0,0 +1,9 @@ + + +class TestPhenopacket: + + def test_create(self): + from phenopackets.schema.v2.phenopackets_pb2 import Phenopacket + pp = Phenopacket() + + assert isinstance(pp, Phenopacket) From c9871081c7e951ae6c91486252d1ce8eb5bb33be Mon Sep 17 00:00:00 2001 From: iimpulse Date: Thu, 4 Apr 2024 15:46:35 -0600 Subject: [PATCH 02/27] reworking for backwards compat --- deploy-python.sh | 63 ++++--------------- pom.xml | 3 - python/config/__init__.py | 13 ++++ python/pyproject.toml | 6 +- .../test/python => python/tests}/covid19.json | 0 {src/test/python => python/tests}/mcahs1.json | 0 ..._top_level_elements.py => test_imports.py} | 2 + .../tests/test_phenopackets.py | 8 +-- requirements.txt | 2 - setup.py | 40 ------------ src/test/python/README.md | 27 -------- src/test/python/__init__.py | 0 12 files changed, 33 insertions(+), 131 deletions(-) create mode 100644 python/config/__init__.py rename {src/test/python => python/tests}/covid19.json (100%) rename {src/test/python => python/tests}/mcahs1.json (100%) rename python/tests/{test_top_level_elements.py => test_imports.py} (67%) rename src/test/python/phenopackets_test.py => python/tests/test_phenopackets.py (97%) delete mode 100644 requirements.txt delete mode 100644 setup.py delete mode 100644 src/test/python/README.md delete mode 100644 src/test/python/__init__.py diff --git a/deploy-python.sh b/deploy-python.sh index cdb7ca6f..93f1b8a4 100755 --- a/deploy-python.sh +++ b/deploy-python.sh @@ -1,30 +1,8 @@ -# Create Temporary Destination -# Phenopackets folder -TEMP_DIRECTORY=$(mktemp -d) -echo "Building phenopacket distribution files in temporary directory at $TEMP_DIRECTORY" -TEMP_DIRECTORY_PYTHON_MODULE="$TEMP_DIRECTORY/phenopackets" -TEMP_DIRECTORY_TESTS_MODULE="$TEMP_DIRECTORY/tests" -TEMP_DIRECTORY_VIRTUAL_ENV="$TEMP_DIRECTORY/phenopackets-venv" -declare -a pyfiles=("base" "phenopackets" "biosample" "disease" "genome" "individual" "interpretation" "medical_action" "measurement" "meta_data" "pedigree" "phenotypic_feature" "vrsatile") -# Functions -createInitFile(){ - echo "import pkg_resources" >> "$TEMP_DIRECTORY/phenopackets/__init__.py" - echo "__version__ = pkg_resources.get_distribution('phenopackets').version" >> "$TEMP_DIRECTORY/phenopackets/__init__.py" - for i in "${pyfiles[@]}" - do - echo "from .${i}_pb2 import *" >> "$TEMP_DIRECTORY/phenopackets/__init__.py" - done -} - -replaceImports(){ - for i in "${pyfiles[@]}" - do - sed -i 's/from phenopackets.schema.v2.core/from . /g' "$TEMP_DIRECTORY_PYTHON_MODULE/${i}_pb2.py" - sed -i 's/from ga4gh.vrsatile.v1/from . /g' "$TEMP_DIRECTORY_PYTHON_MODULE/${i}_pb2.py" - sed -i 's/from ga4gh.vrs.v1/from . /g' "$TEMP_DIRECTORY_PYTHON_MODULE/${i}_pb2.py" - done -} - +# Build and Deploy Python Package +DIRECTORY=./python +echo "Building phenopacket distribution files in directory at $DIRECTORY" +TEMP_DIRECTORY_VIRTUAL_ENV="phenopackets-venv" +cd $DIRECTORY || { echo "Deployment FAILED. Couldn't find directory" ; exit 1; } createVirtualEnvironment(){ echo "Creating Python virtual environment at ${1}" python3 -m venv "${1}" &> /dev/null @@ -39,38 +17,23 @@ createVirtualEnvironment(){ echo "Virtual environment created successfully"; } -# Create python module -mkdir $TEMP_DIRECTORY_PYTHON_MODULE -createInitFile -cp ./target/generated-sources/protobuf/python/phenopackets/schema/v2/phenopackets_pb2.py $TEMP_DIRECTORY_PYTHON_MODULE -cp ./target/generated-sources/protobuf/python/phenopackets/schema/v2/core/* $TEMP_DIRECTORY_PYTHON_MODULE -cp ./target/generated-sources/protobuf/python/ga4gh/vrsatile/v1/vrsatile_pb2.py $TEMP_DIRECTORY_PYTHON_MODULE -cp ./target/generated-sources/protobuf/python/ga4gh/vrs/v1/vrs_pb2.py $TEMP_DIRECTORY_PYTHON_MODULE -replaceImports -# Create tests module -mkdir $TEMP_DIRECTORY_TESTS_MODULE -cp ./src/test/python/* $TEMP_DIRECTORY_TESTS_MODULE -# Copy Packaging files -cp requirements.txt setup.py pom.xml LICENSE README.rst $TEMP_DIRECTORY - # Create Python venv in virtual directory createVirtualEnvironment $TEMP_DIRECTORY_VIRTUAL_ENV -cd $TEMP_DIRECTORY || { echo "Deployment FAILED. Couldn't cd to temp directory" ; exit 1; } # shellcheck disable=SC1090 source "$TEMP_DIRECTORY_VIRTUAL_ENV/bin/activate" -pip install -r "$TEMP_DIRECTORY/requirements.txt" # Dependencies for building/deploying -python3 -m pip install setuptools wheel twine || { echo "Deployment FAILED. Failed to install python dependencies" ; exit 1; } +python3 -m pip install build twine || { echo "Deployment FAILED. Failed to install python dependencies" ; exit 1; } +# Rexport module definition until v3 +cp ./config/__init__.py ./src/phenopackets/ # Test -pip install -e . -python3 setup.py test || { echo "Deployment FAILED. Unittest Failure" ; exit 1; } +python3 -m pip install .[test] +pytest || { echo "Deployment FAILED. Unittest Failure" ; exit 1; } # Build -python3 setup.py sdist bdist_wheel || { echo "Deployment FAILED. Building python package" ; exit 1; } - +python3 -m build || { echo "Deployment FAILED. Building python package" ; exit 1; } # Deploy - Remove --repository testpypi flag for production. -if [ $1 = "release-prod" ]; then +if [ "$1" = "release-prod" ]; then python3 -m twine upload dist/* -elif [ $1 = "release-test" ]; then +elif [ "$1" = "release-test" ]; then python3 -m twine upload --repository testpypi dist/* else echo "Python Release was prepared successfully. No release argument provided, use one of [release-prod, release-test] to make the production/test release." diff --git a/pom.xml b/pom.xml index e2b88a65..95d1ca76 100644 --- a/pom.xml +++ b/pom.xml @@ -210,9 +210,6 @@ test-compile-python - - python/tests - diff --git a/python/config/__init__.py b/python/config/__init__.py new file mode 100644 index 00000000..6d243e93 --- /dev/null +++ b/python/config/__init__.py @@ -0,0 +1,13 @@ +from .schema.v2.core.base_pb2 import * +from .schema.v2.core.biosample_pb2 import * +from .schema.v2.core.disease_pb2 import * +from .schema.v2.core.genome_pb2 import * +from .schema.v2.core.individual_pb2 import * +from .schema.v2.core.interpretation_pb2 import * +from .schema.v2.core.medical_action_pb2 import * +from .schema.v2.core.meta_data_pb2 import * +from .schema.v2.core.pedigree_pb2 import * +from .schema.v2.core.phenotypic_feature_pb2 import * +from .schema.v2.phenopackets_pb2 import * +from ga4gh.vrs.v1.vrs_pb2 import * +from ga4gh.vrsatile.v1.vrsatile_pb2 import * diff --git a/python/pyproject.toml b/python/pyproject.toml index 26f195c3..7c4dd2fa 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -33,9 +33,7 @@ keywords = [ ] dependencies = [ - # TODO: `protobuf>=3.21.7`? - # Maven Central points out presence a security vulnerability in the older versions. - "protobuf>=3.15.0,<4.0.0", + "protobuf>=3.20.2,<4.0.0", ] [project.optional-dependencies] @@ -55,4 +53,4 @@ package-dir = { "" = "src" } [tool.pytest.ini_options] testpaths = [ "tests", -] \ No newline at end of file +] diff --git a/src/test/python/covid19.json b/python/tests/covid19.json similarity index 100% rename from src/test/python/covid19.json rename to python/tests/covid19.json diff --git a/src/test/python/mcahs1.json b/python/tests/mcahs1.json similarity index 100% rename from src/test/python/mcahs1.json rename to python/tests/mcahs1.json diff --git a/python/tests/test_top_level_elements.py b/python/tests/test_imports.py similarity index 67% rename from python/tests/test_top_level_elements.py rename to python/tests/test_imports.py index 82a65365..1f4a5aa0 100644 --- a/python/tests/test_top_level_elements.py +++ b/python/tests/test_imports.py @@ -4,6 +4,8 @@ class TestPhenopacket: def test_create(self): from phenopackets.schema.v2.phenopackets_pb2 import Phenopacket + from phenopackets import Variation + from ga4gh.vrs.v1.vrs_pb2 import Variation pp = Phenopacket() assert isinstance(pp, Phenopacket) diff --git a/src/test/python/phenopackets_test.py b/python/tests/test_phenopackets.py similarity index 97% rename from src/test/python/phenopackets_test.py rename to python/tests/test_phenopackets.py index fdc08796..a489917f 100644 --- a/src/test/python/phenopackets_test.py +++ b/python/tests/test_phenopackets.py @@ -1,13 +1,11 @@ import os -import unittest from google.protobuf.json_format import Parse, MessageToJson from google.protobuf.timestamp_pb2 import Timestamp - from phenopackets import Individual, Sex, PhenotypicFeature, OntologyClass, Phenopacket - -class PhenopacketsTest(unittest.TestCase): +# This will break when we deprecate the new stuff. +class PhenopacketsTest: path = os.path.dirname(os.path.abspath(__file__)) subject = Individual(id="Zaphod", sex="MALE", date_of_birth=Timestamp(seconds=-123456798)) phenotypic_features = [PhenotypicFeature(type=OntologyClass(id="HG2G:00001", label="Hoopy")), @@ -61,4 +59,4 @@ def tearDownClass(cls) -> None: if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c89f3b9b..00000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -protobuf==3.15.0 -xmltodict==0.12.0 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 1f0a4638..00000000 --- a/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -import xmltodict - -from setuptools import setup, find_packages - -path = os.path.dirname(os.path.abspath(__file__)) - -with open(os.path.join(path, 'LICENSE')) as f: - LICENSE = f.read() - -with open(os.path.join(path, 'README.rst')) as f: - READ_ME = f.read() - -with open(os.path.join(path, 'requirements.txt')) as f: - REQUIREMENTS = f.read().splitlines() - - -def version(): - with open(os.path.join(path, 'pom.xml')) as f: - pom = xmltodict.parse(f.read()) - return pom['project']['version'] - - -setup( - name='phenopackets', - version=version(), # replace with version method - packages=find_packages(), - install_requires=REQUIREMENTS, - package_data={'': ['tests/*']}, - data_files=[('', ['requirements.txt', 'LICENSE'])], - test_suite="tests", - long_description=READ_ME, - long_description_content_type='text/x-rst', - author='Michael Gargano', - author_email='michael.gargano@jax.com', - url='https://github.com/phenopackets/phenopacket-schema', - description='A python implementation of phenopackets protobuf', - license=LICENSE, - keywords='phenopackets, clinical' - ) diff --git a/src/test/python/README.md b/src/test/python/README.md deleted file mode 100644 index ef95d7a1..00000000 --- a/src/test/python/README.md +++ /dev/null @@ -1,27 +0,0 @@ -Phenopackets Python Example -=========================== - -Assuming this file is located in the directory `~/github/phenopacket-schema/src/test/python` - -```bash -$ phenopacket_srcdir="~/github/phenopacket-schema" -$ cd $phenopacket_srcdir -# build the project using maven (requires Java) -$ ./mvnw package -# alternatively read-up on protoc -# now setup a dir for the python virtual environment -$ venv_dir="~/python_envs/phenopacket-schema" -$ mkdir -p $venv_dir -$ cd $venv_dir -# copy the auto-generated python files to the env -$ cp -r $phenopacket_srcdir/target/generated-sources/protobuf/python/* . -# copy the test project files -$ cp $phenopacket_srcdir/src/test/python/* . -# setup the python virtual env at `venv` in the current folder -$ python3 -m venv venv -$ source venv/bin/activate -$ pip install -r $phenopacket_srcdir/requirements.txt -$ python phenopackets_test.py -``` - -> the last command in the script above fails. **TODO**: resolve. \ No newline at end of file diff --git a/src/test/python/__init__.py b/src/test/python/__init__.py deleted file mode 100644 index e69de29b..00000000 From ccea6572a0cb0f62eb4cfa99f2398a5c8cf43a57 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 09:51:54 -0400 Subject: [PATCH 03/27] Add explanation to the __init__ file. --- python/config/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/config/__init__.py b/python/config/__init__.py index 6d243e93..8e531465 100644 --- a/python/config/__init__.py +++ b/python/config/__init__.py @@ -1,9 +1,18 @@ +# This __init__ file should be placed into `phenopackets` package +# for backward compatibility with `[2.0.0, 2.0.2.post1]`, +# where we used to import Phenopacket Schema building blocks as +# ``` +# import phenopackets as pp2 +# phenopacket = pp2.Phenopacket() +# ``` + from .schema.v2.core.base_pb2 import * from .schema.v2.core.biosample_pb2 import * from .schema.v2.core.disease_pb2 import * from .schema.v2.core.genome_pb2 import * from .schema.v2.core.individual_pb2 import * from .schema.v2.core.interpretation_pb2 import * +from .schema.v2.core.measurement_pb2 import * from .schema.v2.core.medical_action_pb2 import * from .schema.v2.core.meta_data_pb2 import * from .schema.v2.core.pedigree_pb2 import * From d933390fc02111ca2b3067e85828fb9ddd2b2a9d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 09:53:27 -0400 Subject: [PATCH 04/27] Finish testing. --- python/tests/conftest.py | 10 +++ python/tests/test_imports.py | 103 ++++++++++++++++++++++++-- python/tests/test_phenopackets.py | 118 ++++++++++++++++-------------- 3 files changed, 170 insertions(+), 61 deletions(-) create mode 100644 python/tests/conftest.py diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 00000000..d27777bd --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,10 @@ +import os + +import pytest + + +@pytest.fixture(scope='session') +def fpath_test_dir() -> str: + # When running `pytest` from the top-level Python folder (`phenopacket-schema/python`) + # this path will evaluate to the `tests` folder. + return os.path.dirname(__file__) diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 1f4a5aa0..9baf1a69 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -1,11 +1,104 @@ +class TestImportsV202: + """ + Test that the imports which we had prior `v2.0.2.post2` work. + """ + def test_singular_import(self): + from phenopackets import Phenopacket -class TestPhenopacket: + pp = Phenopacket(id='retinoblastoma.id') - def test_create(self): + assert isinstance(pp, Phenopacket) + + def test_import_module(self): + import phenopackets as pp2 + + pp = pp2.Phenopacket(id='retinoblastoma.id') + + assert isinstance(pp, pp2.Phenopacket) + + def test_import_everything(self): + import phenopackets + + pp = phenopackets.Phenopacket(id='retinoblastoma.id') + + assert isinstance(pp, phenopackets.Phenopacket) + + def test_import_all_sorts_of_stuff(self): + import phenopackets as pps + + payload = ( + pps.OntologyClass, pps.Individual, pps.PhenotypicFeature, pps.Measurement, pps.Biosample, + pps.Interpretation, pps.Disease, pps.MedicalAction, pps.File, pps.MetaData, pps.Family, pps.Cohort, + ) + + for clz in payload: + x = clz() + assert isinstance(x, clz) + + +class TestImportsPostV202: + """ + Test that the imports which we have at and after `v2.0.2.post2` work. + """ + + def test_singular_import(self): from phenopackets.schema.v2.phenopackets_pb2 import Phenopacket - from phenopackets import Variation - from ga4gh.vrs.v1.vrs_pb2 import Variation - pp = Phenopacket() + + pp = Phenopacket(id='retinoblastoma.id') assert isinstance(pp, Phenopacket) + + def test_import_module(self): + import phenopackets.schema.v2.phenopackets_pb2 as pp2 + + pp = pp2.Phenopacket(id='retinoblastoma.id') + + assert isinstance(pp, pp2.Phenopacket) + + def test_import_everything(self): + import phenopackets + + pp = phenopackets.schema.v2.phenopackets_pb2.Phenopacket(id='retinoblastoma.id') + + assert isinstance(pp, phenopackets.schema.v2.phenopackets_pb2.Phenopacket) + + def test_import_v1(self): + from phenopackets.schema.v1.phenopackets_pb2 import Phenopacket + + p = Phenopacket() + assert isinstance(p, Phenopacket) + + +class TestImportVrsatile: + + def test_singular_import(self): + from ga4gh.vrsatile.v1.vrsatile_pb2 import Extension + + e = Extension() + + assert isinstance(e, Extension) + + def test_import_vrs(self): + import ga4gh.vrsatile.v1.vrsatile_pb2 as vrsatile + + e = vrsatile.Extension() + + assert isinstance(e, vrsatile.Extension) + + +class TestImportVrs: + + def test_singular_import(self): + from ga4gh.vrs.v1.vrs_pb2 import Number + + number = Number() + + assert isinstance(number, Number) + + def test_import_vrs(self): + import ga4gh.vrs.v1.vrs_pb2 as vrs + + n = vrs.Number() + + assert isinstance(n, vrs.Number) diff --git a/python/tests/test_phenopackets.py b/python/tests/test_phenopackets.py index a489917f..684a1368 100644 --- a/python/tests/test_phenopackets.py +++ b/python/tests/test_phenopackets.py @@ -1,62 +1,68 @@ import os +import pytest + from google.protobuf.json_format import Parse, MessageToJson from google.protobuf.timestamp_pb2 import Timestamp from phenopackets import Individual, Sex, PhenotypicFeature, OntologyClass, Phenopacket -# This will break when we deprecate the new stuff. -class PhenopacketsTest: - path = os.path.dirname(os.path.abspath(__file__)) - subject = Individual(id="Zaphod", sex="MALE", date_of_birth=Timestamp(seconds=-123456798)) - phenotypic_features = [PhenotypicFeature(type=OntologyClass(id="HG2G:00001", label="Hoopy")), - PhenotypicFeature(type=OntologyClass(id="HG2G:00002", label="Frood")) - ] - phenopacket = Phenopacket(id="PPKT:1", subject=subject, phenotypic_features=phenotypic_features) - test_json_file = "test.json" - def test_phenopacket_round_trip(self): - with open(self.test_json_file, 'w') as jsfile: - jsfile.write(MessageToJson(self.phenopacket)) - with open(self.test_json_file, 'r') as jsfile: - phenopacket = Parse(message=Phenopacket(), text=jsfile.read()) - self.assertEqual(phenopacket.subject.id, "Zaphod") - self.assertEqual(Sex.Name(phenopacket.subject.sex), "MALE") - for phenotypic_feature, expected_feature in zip(phenopacket.phenotypic_features, self.phenotypic_features): - term = phenotypic_feature.type - expected_term = expected_feature.type - self.assertEqual(term.id, expected_term.id) - self.assertEqual(term.label, expected_term.label) - - def test_phenopacket_covid(self): - with(open(os.path.join(self.path,"covid19.json"), 'r')) as covid19: - phenopacket = Parse(message=Phenopacket(), text=covid19.read()) - self.assertEqual("P123542", phenopacket.subject.id) - self.assertEqual(11, len(phenopacket.phenotypic_features)) - self.assertEqual(2, len(phenopacket.measurements)) - self.assertEqual("NCIT:C113237", phenopacket.measurements[0].assay.id) - self.assertEqual("LOINC:26474-7", phenopacket.measurements[1].assay.id) - self.assertEqual("MONDO:0005015", phenopacket.diseases[0].term.id) - self.assertEqual("cardiomyopathy", phenopacket.diseases[1].term.label) - self.assertNotEqual(None, phenopacket.diseases[2].onset.timestamp) - - def test_phenopacket_mcahs1(self): - with(open(os.path.join(self.path,"mcahs1.json"), 'r')) as covid19: - phenopacket = Parse(message=Phenopacket(), text=covid19.read()) - self.assertEqual("proband A", phenopacket.subject.id) - self.assertEqual(1, phenopacket.subject.sex) - self.assertEqual(3, phenopacket.interpretations[0].progress_status) - self.assertEqual("Bethlem myopathy 1", phenopacket.interpretations[0].diagnosis.disease.label) - self.assertEqual(5, phenopacket.interpretations[0].diagnosis.genomic_interpretations[0].variant_interpretation.acmg_pathogenicity_classification) - self.assertEqual("GENO:0000135", phenopacket.interpretations[0].diagnosis.genomic_interpretations[0].variant_interpretation.variation_descriptor.allelic_state.id) - - - - @classmethod - def tearDownClass(cls) -> None: - try: - os.remove(cls.test_json_file) - except OSError as e: - print("Error: %s - %s." % (e.filename, e.strerror)) - - -if __name__ == '__main__': - unittest.main() + +# This will break when we deprecate the top-level import. +class TestPhenopackets: + + @pytest.fixture + def phenopacket(self) -> Phenopacket: + subject = Individual(id="Zaphod", sex="MALE", date_of_birth=Timestamp(seconds=-123456798)) + phenotypic_features = ( + PhenotypicFeature(type=OntologyClass(id="HG2G:00001", label="Hoopy")), + PhenotypicFeature(type=OntologyClass(id="HG2G:00002", label="Frood")), + ) + + return Phenopacket(id="PPKT:1", subject=subject, phenotypic_features=phenotypic_features) + + def test_phenopacket_round_trip( + self, + phenopacket: Phenopacket, + ): + output: str = MessageToJson(phenopacket) + + back = Parse(message=Phenopacket(), text=output) + + assert back.subject.id == "Zaphod" + assert Sex.Name(back.subject.sex) == "MALE" + for phenotypic_feature, expected_feature in zip(back.phenotypic_features, phenopacket.phenotypic_features): + term = phenotypic_feature.type + expected_term = expected_feature.type + assert term.id == expected_term.id + assert term.label == expected_term.label + + def test_phenopacket_covid( + self, + fpath_test_dir: str, + ): + with open(os.path.join(fpath_test_dir, "covid19.json")) as fh: + pp = Parse(message=Phenopacket(), text=fh.read()) + + assert pp.subject.id == "P123542" + assert len(pp.phenotypic_features) == 11 + assert len(pp.measurements) == 2 + assert pp.measurements[0].assay.id == "NCIT:C113237" + assert pp.measurements[1].assay.id == "LOINC:26474-7" + assert pp.diseases[0].term.id == "MONDO:0005015" + assert pp.diseases[1].term.label == "cardiomyopathy" + + assert pp.diseases[2].onset.timestamp is not None + + def test_phenopacket_mcahs1( + self, + fpath_test_dir: str, + ): + with open(os.path.join(fpath_test_dir, "mcahs1.json")) as fh: + pp = Parse(message=Phenopacket(), text=fh.read()) + + assert pp.subject.id == "proband A" + assert pp.subject.sex == 1 + assert pp.interpretations[0].progress_status == 3 + assert pp.interpretations[0].diagnosis.disease.label == "Bethlem myopathy 1" + assert pp.interpretations[0].diagnosis.genomic_interpretations[0].variant_interpretation.acmg_pathogenicity_classification == 5 + assert pp.interpretations[0].diagnosis.genomic_interpretations[0].variant_interpretation.variation_descriptor.allelic_state.id == "GENO:0000135" From 5ed24419dafdf71f91cf8d06938228086007666e Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 10:26:41 -0400 Subject: [PATCH 05/27] Revert changes to `pom.xml`, do everything in the deployment script. --- deploy-python.sh | 30 ++++++++++++++++++++++-------- pom.xml | 16 +--------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/deploy-python.sh b/deploy-python.sh index 93f1b8a4..6c89531e 100755 --- a/deploy-python.sh +++ b/deploy-python.sh @@ -1,7 +1,20 @@ +#!/usr/bin/env bash + # Build and Deploy Python Package +# We assume the script is ran from the top-level repository folder as ./deploy-python.sh + DIRECTORY=./python echo "Building phenopacket distribution files in directory at $DIRECTORY" -TEMP_DIRECTORY_VIRTUAL_ENV="phenopackets-venv" + +# Ensure we generated the protobuf Python files. +./mvnw clean package + +# Copy the generated files into the target folder +cp -r target/generated-sources/protobuf/python/* ${DIRECTORY}/src/ +# Add module file to re-export the v2.0.2 classes to maintain backwards compatibility +# until we remove the classes. +cp ${DIRECTORY}/config/__init__.py ${DIRECTORY}/src/phenopackets/ + cd $DIRECTORY || { echo "Deployment FAILED. Couldn't find directory" ; exit 1; } createVirtualEnvironment(){ echo "Creating Python virtual environment at ${1}" @@ -18,16 +31,17 @@ createVirtualEnvironment(){ } # Create Python venv in virtual directory +TEMP_DIRECTORY_VIRTUAL_ENV="phenopackets-venv" createVirtualEnvironment $TEMP_DIRECTORY_VIRTUAL_ENV # shellcheck disable=SC1090 source "$TEMP_DIRECTORY_VIRTUAL_ENV/bin/activate" -# Dependencies for building/deploying -python3 -m pip install build twine || { echo "Deployment FAILED. Failed to install python dependencies" ; exit 1; } -# Rexport module definition until v3 -cp ./config/__init__.py ./src/phenopackets/ + # Test -python3 -m pip install .[test] +python3 -m pip install ".[test]" pytest || { echo "Deployment FAILED. Unittest Failure" ; exit 1; } + +# Install dependencies for building/deploying +python3 -m pip install build twine || { echo "Deployment FAILED. Failed to install python dependencies" ; exit 1; } # Build python3 -m build || { echo "Deployment FAILED. Building python package" ; exit 1; } # Deploy - Remove --repository testpypi flag for production. @@ -39,5 +53,5 @@ else echo "Python Release was prepared successfully. No release argument provided, use one of [release-prod, release-test] to make the production/test release." fi - - +# Clean up +rm -rf build dist src/* diff --git a/pom.xml b/pom.xml index 95d1ca76..c1e1f60d 100644 --- a/pom.xml +++ b/pom.xml @@ -187,28 +187,14 @@ - java-cpp-js compile test-compile compile-cpp test-compile-cpp - compile-js - - - - compile-python - compile-python - - - python/src - - - - compile-python-test - test-compile-python + compile-js From 598c3bb662b9c2ca2a1163a14ce6d93dfd045508 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 10:30:08 -0400 Subject: [PATCH 06/27] Improve clean-up. --- deploy-python.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deploy-python.sh b/deploy-python.sh index 6c89531e..5c5c99dc 100755 --- a/deploy-python.sh +++ b/deploy-python.sh @@ -54,4 +54,5 @@ else fi # Clean up -rm -rf build dist src/* +deactivate +rm -rf build dist src/* ${TEMP_DIRECTORY_VIRTUAL_ENV} From 4ed9b7629b58049457a0b29065e9a3746321a496 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 14:21:20 -0400 Subject: [PATCH 07/27] Finish merging `master` to `python/include-generated-files`. --- setup.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index e69de29b..00000000 From 8396f1381a1225c52482414577845f565c9b3f71 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 14:22:51 -0400 Subject: [PATCH 08/27] Reorder protobuf plugin goals to the original order. --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 9bf70c6a..0ee8b212 100644 --- a/pom.xml +++ b/pom.xml @@ -190,10 +190,10 @@ compile test-compile - compile-cpp - test-compile-cpp compile-python test-compile-python + compile-cpp + test-compile-cpp compile-js From b131bb863bbb5429c5d6a64d55d564c018b8a1d3 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 5 Apr 2024 14:28:32 -0400 Subject: [PATCH 09/27] Update the python README --- python/README.md | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/python/README.md b/python/README.md index 69b4bab4..f1a0c70b 100644 --- a/python/README.md +++ b/python/README.md @@ -10,18 +10,28 @@ Generate the Python sources by running Maven up to `test` or more: ./mvnw test ``` -Maven will run the Protobuf compiler and write the generated Python classes into `src` folder. +Maven will run the Protobuf compiler and generate the Python classes. -We must do some postprocessing to maintain backward compatibility with the previous `phenopackets` versions. -Therefore, we run a script to put all elements of Phenopacket Schema v2.0.2 into the top-level `phenopackets` package. +We copy the generated classes into the package: -TODO(iimpulse): implement +```shell +cp -r target/generated-sources/protobuf/python/* python/src/ +``` + +We re-export elements of `v2.0.2` schema from the top-level package to maintain the backward compatibility +with the previous `phenopackets` versions. + +```shell +cd python + +cp config/__init__.py src/phenopackets/ +``` Now is the time to run tests. Note, the tests can only be run *after* installing the package with `test` dependencies! Let's install the package and run the tests: ```shell -# Install the `phenopackets` with test dependencies +# Install the package with test dependencies python3 -m pip install .[test] # Run the tests @@ -32,6 +42,9 @@ If the tests pass, we can build and deploy the package to PyPi. To do so, we will need the `build` and `twine` Python packages in the environment: ```shell +# Install the build libraries +python3 -m pip install build twine + # Build python3 -m build From 7c03f2dd8d4e68431c34a2608f881c6739f315d2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 19 Apr 2024 11:59:08 -0400 Subject: [PATCH 10/27] Move the vrs/vrsatile proto files into `phenopackets`. --- python/config/__init__.py | 4 ++-- python/tests/test_imports.py | 8 ++++---- .../phenopackets/schema/v2/core/interpretation.proto | 2 +- src/main/proto/{ga4gh => phenopackets}/vrs/v1/vrs.proto | 0 .../{ga4gh => phenopackets}/vrsatile/v1/vrsatile.proto | 2 +- src/vrs-protobuf | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) rename src/main/proto/{ga4gh => phenopackets}/vrs/v1/vrs.proto (100%) rename src/main/proto/{ga4gh => phenopackets}/vrsatile/v1/vrsatile.proto (98%) diff --git a/python/config/__init__.py b/python/config/__init__.py index 8e531465..adc923b5 100644 --- a/python/config/__init__.py +++ b/python/config/__init__.py @@ -18,5 +18,5 @@ from .schema.v2.core.pedigree_pb2 import * from .schema.v2.core.phenotypic_feature_pb2 import * from .schema.v2.phenopackets_pb2 import * -from ga4gh.vrs.v1.vrs_pb2 import * -from ga4gh.vrsatile.v1.vrsatile_pb2 import * +from .vrs.v1.vrs_pb2 import * +from .vrsatile.v1.vrsatile_pb2 import * diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 9baf1a69..6f5958d6 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -73,14 +73,14 @@ def test_import_v1(self): class TestImportVrsatile: def test_singular_import(self): - from ga4gh.vrsatile.v1.vrsatile_pb2 import Extension + from phenopackets.vrsatile.v1.vrsatile_pb2 import Extension e = Extension() assert isinstance(e, Extension) def test_import_vrs(self): - import ga4gh.vrsatile.v1.vrsatile_pb2 as vrsatile + import phenopackets.vrsatile.v1.vrsatile_pb2 as vrsatile e = vrsatile.Extension() @@ -90,14 +90,14 @@ def test_import_vrs(self): class TestImportVrs: def test_singular_import(self): - from ga4gh.vrs.v1.vrs_pb2 import Number + from phenopackets.vrs.v1.vrs_pb2 import Number number = Number() assert isinstance(number, Number) def test_import_vrs(self): - import ga4gh.vrs.v1.vrs_pb2 as vrs + import phenopackets.vrs.v1.vrs_pb2 as vrs n = vrs.Number() diff --git a/src/main/proto/phenopackets/schema/v2/core/interpretation.proto b/src/main/proto/phenopackets/schema/v2/core/interpretation.proto index 697653e7..58861f38 100644 --- a/src/main/proto/phenopackets/schema/v2/core/interpretation.proto +++ b/src/main/proto/phenopackets/schema/v2/core/interpretation.proto @@ -3,7 +3,7 @@ syntax = "proto3"; package org.phenopackets.schema.v2.core; import "phenopackets/schema/v2/core/base.proto"; -import "ga4gh/vrsatile/v1/vrsatile.proto"; +import "phenopackets/vrsatile/v1/vrsatile.proto"; option java_multiple_files = true; option java_package = "org.phenopackets.schema.v2.core"; diff --git a/src/main/proto/ga4gh/vrs/v1/vrs.proto b/src/main/proto/phenopackets/vrs/v1/vrs.proto similarity index 100% rename from src/main/proto/ga4gh/vrs/v1/vrs.proto rename to src/main/proto/phenopackets/vrs/v1/vrs.proto diff --git a/src/main/proto/ga4gh/vrsatile/v1/vrsatile.proto b/src/main/proto/phenopackets/vrsatile/v1/vrsatile.proto similarity index 98% rename from src/main/proto/ga4gh/vrsatile/v1/vrsatile.proto rename to src/main/proto/phenopackets/vrsatile/v1/vrsatile.proto index 5f56e489..c45ceb2d 100644 --- a/src/main/proto/ga4gh/vrsatile/v1/vrsatile.proto +++ b/src/main/proto/phenopackets/vrsatile/v1/vrsatile.proto @@ -2,7 +2,7 @@ syntax = "proto3"; package org.ga4gh.vrsatile.v1; -import "ga4gh/vrs/v1/vrs.proto"; +import "phenopackets/vrs/v1/vrs.proto"; import "phenopackets/schema/v2/core/base.proto"; option java_multiple_files = true; diff --git a/src/vrs-protobuf b/src/vrs-protobuf index d045bb0c..d3e8c390 160000 --- a/src/vrs-protobuf +++ b/src/vrs-protobuf @@ -1 +1 @@ -Subproject commit d045bb0c65152a0cb32177dfc21148cc13d40fbe +Subproject commit d3e8c390f80adc7a080b7156e444bf22bd20c88e From 2e3540ab437215e6f2717280b90a5233b5e10171 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 29 Apr 2024 21:33:20 -0400 Subject: [PATCH 11/27] Allow to import all `v2` building blocks. --- deploy-python.sh | 7 ++++--- python/config/schema/v2/__init__.py | 18 ++++++++++++++++++ python/tests/test_imports.py | 24 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 python/config/schema/v2/__init__.py diff --git a/deploy-python.sh b/deploy-python.sh index 5c5c99dc..005b7de6 100755 --- a/deploy-python.sh +++ b/deploy-python.sh @@ -11,9 +11,9 @@ echo "Building phenopacket distribution files in directory at $DIRECTORY" # Copy the generated files into the target folder cp -r target/generated-sources/protobuf/python/* ${DIRECTORY}/src/ -# Add module file to re-export the v2.0.2 classes to maintain backwards compatibility -# until we remove the classes. -cp ${DIRECTORY}/config/__init__.py ${DIRECTORY}/src/phenopackets/ +# Add module files to re-export the v2.0.2 classes to maintain backwards compatibility, +# and `__init__.py` to allow importing all v2 building blocks at once. +cp -r ${DIRECTORY}/config/* ${DIRECTORY}/src/phenopackets cd $DIRECTORY || { echo "Deployment FAILED. Couldn't find directory" ; exit 1; } createVirtualEnvironment(){ @@ -54,5 +54,6 @@ else fi # Clean up +echo "Cleaning up the build environment and the build files" deactivate rm -rf build dist src/* ${TEMP_DIRECTORY_VIRTUAL_ENV} diff --git a/python/config/schema/v2/__init__.py b/python/config/schema/v2/__init__.py new file mode 100644 index 00000000..f106b263 --- /dev/null +++ b/python/config/schema/v2/__init__.py @@ -0,0 +1,18 @@ +# The __init__ file for placing in `phenopackets.schema.v2` package +# to allow importing all building blocks of the Phenopacket Schema v2, +# including the VRS/VRSATILE elements. + +from .core.base_pb2 import * +from .core.biosample_pb2 import * +from .core.disease_pb2 import * +from .core.genome_pb2 import * +from .core.individual_pb2 import * +from .core.interpretation_pb2 import * +from .core.measurement_pb2 import * +from .core.medical_action_pb2 import * +from .core.meta_data_pb2 import * +from .core.pedigree_pb2 import * +from .core.phenotypic_feature_pb2 import * +from .phenopackets_pb2 import * +from ...vrs.v1.vrs_pb2 import * +from ...vrsatile.v1.vrsatile_pb2 import * diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 6f5958d6..cfe94c5b 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -69,6 +69,30 @@ def test_import_v1(self): p = Phenopacket() assert isinstance(p, Phenopacket) + def test_import_all_v2_building_blocks(self): + import phenopackets.schema.v2 as pps2 + + payload = ( + pps2.OntologyClass, pps2.File, # `.core.base_pb2.py` + pps2.Biosample, # `.core.biosample_pb2.py` + pps2.Disease, # `.core.disease_pb2.py` + # Nothing for the `.core.genome_pb2.py` file + pps2.Individual, # `.core.individual_pb2.py` + pps2.Interpretation, # `.core.interpretation_pb2.py` + pps2.Measurement, # `.core.measurement_pb2.py` + pps2.MedicalAction, # `.core.medical_action_pb2.py` + pps2.MetaData, # `.core.meta_data_pb2.py` + pps2.Pedigree, # `.core.pedigree_pb2.py` + pps2.PhenotypicFeature, # `.core.phenotypic_feature_pb2.py` + pps2.Cohort, pps2.Family, pps2.Phenopacket, # `.phenopackets_pb2.py` + pps2.Allele, # `phenopackets.vrs.v1.vrs_pb2.py` + pps2.VariationDescriptor, # `phenopackets.vrsatile.v1.vrsatile_pb2.py` + ) + + for clz in payload: + x = clz() + assert isinstance(x, clz) + class TestImportVrsatile: From e30ef0ef3cf94206a50e865e8f0e2504dfc692eb Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 29 Apr 2024 23:13:05 -0400 Subject: [PATCH 12/27] Add documentation for Python. --- docs/python.rst | 140 ++++++++++++++++++++++++++++++++++++++++++ docs/working.rst | 1 + python/pyproject.toml | 2 + 3 files changed, 143 insertions(+) create mode 100644 docs/python.rst diff --git a/docs/python.rst b/docs/python.rst new file mode 100644 index 00000000..8bd17859 --- /dev/null +++ b/docs/python.rst @@ -0,0 +1,140 @@ +.. _rstpython: + +################################### +Working with Phenopackets in Python +################################### + +Similarly to :ref:`Java `, the :ref:`Phenopacket Schema ` can be considered the source of truth +for the specification, and the JSON produced by an arbitrary implementation can be used to inter-operate +with other services. Nevertheless, we **strongly** suggest to use the `phenopackets` library available +from Python Package Index (PyPi) or use the Python bindings generated by Protobuf compiler from the Protobuf files. + +Here we provide a brief overview of the `phenopackets` library. + + +Install `phenopackets` into your Python environment +*************************************************** + +The `phenopackets` package can be installed from PyPi by running: + +.. code-block:: shell + + python3 -m pip install phenopackets + +We use `pip` to install `phenopackets` and the required libraries/dependencies. + + +Create building blocks programmatically +*************************************** + +Let's start by importing all building blocks of Phenopacket Schema v2: + +>>> import phenopackets.schema.v2 as pps2 + +Now we can access all building blocks of v2 Phenopacket Schema via `pps2` alias. + +For instance, we can create an :ref:`Ontology class ` that corresponds to a Human Phenotype Ontology +term for *Spherocytosis* (`HP:0004444`): + +>>> spherocytosis = pps2.OntologyClass(id='HP:0004444', label='Spherocytosis') +>>> spherocytosis # doctest: +NORMALIZE_WHITESPACE + id: "HP:0004444" + label: "Spherocytosis" + +All schema building blocks, including `OntologyClass`, are available under `pps2` alias, and can be created with constructors that accept key/value arguments. +The constructors will not allow passing of arbitrary attributes: + +>>> pps2.OntologyClass(foo='bar') +Traceback (most recent call last): + ... +ValueError: Protocol message OntologyClass has no "foo" field. + +We do not have to provide all attributes at the creation time and we can set the fields sequentially +using Python property syntax, to achieve the same outcome: + +>>> spherocytosis2 = pps2.OntologyClass() +>>> spherocytosis2.id = 'HP:0004444' +>>> spherocytosis2.label = 'Spherocytosis' +>>> spherocytosis == spherocytosis2 +True + +However, setting the field values with property syntax only works for +`singular `_ (non-message) fields, +such as `bool`, `int`, `str`, or `float`, and the assignment will *NOT* work for message fields: + +>>> pf = pps2.PhenotypicFeature() +>>> pf.type = spherocytosis +Traceback (most recent call last): + ... +AttributeError: Assignment not allowed to field "type" in protocol message object. + +To set a message field, we must use the `CopyFrom` function: + +>>> pf.type.CopyFrom(spherocytosis) +>>> pf # doctest: +NORMALIZE_WHITESPACE + type { + id: "HP:0004444" + label: "Spherocytosis" + } + +Last, a repeated field can be set using list-like semantics: + +>>> modifiers = ( +... pps2.OntologyClass(id='HP:0003623', label='Neonatal onset'), +... pps2.OntologyClass(id='HP:0011010', label='Chronic'), +... ) +>>> pf.modifiers.extend(modifiers) +>>> pf # doctest: +NORMALIZE_WHITESPACE + type { + id: "HP:0004444" + label: "Spherocytosis" + } + modifiers { + id: "HP:0003623" + label: "Neonatal onset" + } + modifiers { + id: "HP:0011010" + label: "Chronic" + } + +See `Protobuf documentation `_ +for more info. + + +Building blocks I/O +******************* + +Having an instance with data, we can write the content into Protobuf's wire format: + +>>> binary_str = pf.SerializeToString() +>>> binary_str +b'\x12\x1b\n\nHP:0004444\x12\rSpherocytosis*\x1c\n\nHP:0003623\x12\x0eNeonatal onset*\x15\n\nHP:0011010\x12\x07Chronic' + +and get the same content back: + +>>> pf2 = pps2.PhenotypicFeature() +>>> _ = pf2.ParseFromString(binary_str) +>>> pf == pf2 +True + +We can also dump the content of the building block to a *JSON* string or to a `dict` with Python objects using +`MessageToJson `_ +or `MessageToDict `_ +functions: + +>>> from google.protobuf.json_format import MessageToDict +>>> json_dict = MessageToDict(pf) +>>> json_dict +{'type': {'id': 'HP:0004444', 'label': 'Spherocytosis'}, 'modifiers': [{'id': 'HP:0003623', 'label': 'Neonatal onset'}, {'id': 'HP:0011010', 'label': 'Chronic'}]} + +We complete the JSON round-trip using +`Parse `_ +or `ParseDict `_ +functions: + +>>> from google.protobuf.json_format import ParseDict +>>> pf2 = ParseDict(json_dict, pps2.PhenotypicFeature()) +>>> pf == pf2 +True + diff --git a/docs/working.rst b/docs/working.rst index 3122073a..bef5e6bd 100644 --- a/docs/working.rst +++ b/docs/working.rst @@ -20,6 +20,7 @@ produced as part of the build (:ref:`rstjavabuild`). :maxdepth: 1 Working with Phenopackets in Java + Working with Phenopackets in Python Working with Phenopackets in C++ Security disclaimer diff --git a/python/pyproject.toml b/python/pyproject.toml index 7c4dd2fa..f3cab230 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -53,4 +53,6 @@ package-dir = { "" = "src" } [tool.pytest.ini_options] testpaths = [ "tests", + "../docs", ] +addopts = "--doctest-modules --doctest-glob=\"*.rst\"" From 71adade0a7ffc13273be57877571960a2049d9d2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 6 May 2024 11:49:23 -0400 Subject: [PATCH 13/27] Update Maven due to `protobuf-maven-plugin`. --- .mvn/wrapper/maven-wrapper.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties index d26693d3..34dff68d 100644 --- a/.mvn/wrapper/maven-wrapper.properties +++ b/.mvn/wrapper/maven-wrapper.properties @@ -1 +1 @@ -distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip \ No newline at end of file +distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.9.6/apache-maven-3.9.6-bin.zip \ No newline at end of file From ef66fcd8095574d67cd67c9fd28fe606661565a2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 6 May 2024 11:50:05 -0400 Subject: [PATCH 14/27] Use `io.github.ascopes:protobuf-maven-plugin` instead of xolstice to enable generating Python type stubs. --- pom.xml | 58 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/pom.xml b/pom.xml index 0ee8b212..276dbf08 100644 --- a/pom.xml +++ b/pom.xml @@ -169,33 +169,55 @@ - - - kr.motd.maven - os-maven-plugin - 1.6.0 - - - org.xolstice.maven.plugins + io.github.ascopes protobuf-maven-plugin - 0.6.1 - true + 1.2.1 + - com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + ${protobuf.version} + + src/main/proto + + + false + generate-java + + generate + + + true + ${project.build.directory}/generated-sources/java + + + + generate-python - compile - test-compile - compile-python - test-compile-python - compile-cpp - test-compile-cpp - compile-js + generate + + true + true + ${project.build.directory}/generated-sources/python + + + + generate-cpp + + generate + + + true + ${project.build.directory}/generated-sources/cpp + From 7ef57c1ed6081dc1a646e17a855940ae58c9a089 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 6 May 2024 11:50:15 -0400 Subject: [PATCH 15/27] Ignore `*.pyi` files. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8888f6a6..f6c4b6c3 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,7 @@ nb-configuration.xml # We do not track the generated Protobuf files for now. python/**/*_pb2.py +python/**/*_pb2.pyi # Byte-compiled / optimized / DLL files __pycache__/ From f3a9e24c84fd6d384cf7226f4b05a3376069caad Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 6 May 2024 13:07:39 -0400 Subject: [PATCH 16/27] Use Maven to manage the lifecycle of the Python protobuf bindings. --- deploy-python.sh | 12 +++----- pom.xml | 30 +++++++++++-------- .../{config => src/phenopackets}/__init__.py | 0 .../phenopackets}/schema/v2/__init__.py | 0 4 files changed, 22 insertions(+), 20 deletions(-) rename python/{config => src/phenopackets}/__init__.py (100%) rename python/{config => src/phenopackets}/schema/v2/__init__.py (100%) diff --git a/deploy-python.sh b/deploy-python.sh index 005b7de6..27e303ae 100755 --- a/deploy-python.sh +++ b/deploy-python.sh @@ -7,13 +7,7 @@ DIRECTORY=./python echo "Building phenopacket distribution files in directory at $DIRECTORY" # Ensure we generated the protobuf Python files. -./mvnw clean package - -# Copy the generated files into the target folder -cp -r target/generated-sources/protobuf/python/* ${DIRECTORY}/src/ -# Add module files to re-export the v2.0.2 classes to maintain backwards compatibility, -# and `__init__.py` to allow importing all v2 building blocks at once. -cp -r ${DIRECTORY}/config/* ${DIRECTORY}/src/phenopackets +./mvnw clean compile cd $DIRECTORY || { echo "Deployment FAILED. Couldn't find directory" ; exit 1; } createVirtualEnvironment(){ @@ -56,4 +50,6 @@ fi # Clean up echo "Cleaning up the build environment and the build files" deactivate -rm -rf build dist src/* ${TEMP_DIRECTORY_VIRTUAL_ENV} +rm -rf build dist ${TEMP_DIRECTORY_VIRTUAL_ENV} +cd .. +./mvnw clean diff --git a/pom.xml b/pom.xml index 276dbf08..2ba4d789 100644 --- a/pom.xml +++ b/pom.xml @@ -53,6 +53,7 @@ 3.20.3 2.15.2 5.7.2 + ${project.basedir}/python/src @@ -195,7 +196,6 @@ true - ${project.build.directory}/generated-sources/java @@ -206,17 +206,7 @@ true true - ${project.build.directory}/generated-sources/python - - - - generate-cpp - - generate - - - true - ${project.build.directory}/generated-sources/cpp + ${python.src} @@ -274,6 +264,22 @@ + + org.apache.maven.plugins + maven-clean-plugin + 3.3.2 + + + + + ${python.src} + + **/__init__.py + + + + + diff --git a/python/config/__init__.py b/python/src/phenopackets/__init__.py similarity index 100% rename from python/config/__init__.py rename to python/src/phenopackets/__init__.py diff --git a/python/config/schema/v2/__init__.py b/python/src/phenopackets/schema/v2/__init__.py similarity index 100% rename from python/config/schema/v2/__init__.py rename to python/src/phenopackets/schema/v2/__init__.py From 527bc724f531cdce19d1429b845b36f0728c16ad Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:04:00 +0200 Subject: [PATCH 17/27] Ignore VS Code files. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index f6c4b6c3..4617a216 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,9 @@ buildNumber.properties # Avoid ignoring Maven wrapper jar file (.jar files are usually ignored) !maven-wrapper.jar +# VS Code files +.vscode/ + ### JetBrains template # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio From 60010d1bcab1b84bd43d92b28ca48c39d76e677d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:04:52 +0200 Subject: [PATCH 18/27] Upgrade protobuf plugin, embed protobuf source files in JAR. --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2ba4d789..bf8d7d40 100644 --- a/pom.xml +++ b/pom.xml @@ -174,7 +174,7 @@ io.github.ascopes protobuf-maven-plugin - 1.2.1 + 2.2.3 ${protobuf.version} @@ -187,6 +187,7 @@ to prevent generating Java classes into the other language's folder. --> false + true From 6ce7f95fc6b8edf22e9e8eb4d3a338ee8829bf29 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:08:36 +0200 Subject: [PATCH 19/27] Add CI to run Python tests. --- .github/workflows/python.yml | 45 ++++++++++++++++++++++++++++++++++++ docs/python.rst | 2 +- 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/python.yml diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 00000000..b025f255 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,45 @@ +# This workflow will generate a Python protobuf bindings and type stubs with Maven and run Python tests. + +name: Python CI with Maven and Pytest + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + workflow_dispatch: + +jobs: + run-python-ci: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Build with Maven + run: ./mvnw -B package -DskipTests # we run tests elsewhere + + - name: Install Python bindings + run: | + cd python && python3 -m pip install .[test] + + - name: Run Python tests + run: | + pytest + \ No newline at end of file diff --git a/docs/python.rst b/docs/python.rst index 8bd17859..a54316f3 100644 --- a/docs/python.rst +++ b/docs/python.rst @@ -66,7 +66,7 @@ such as `bool`, `int`, `str`, or `float`, and the assignment will *NOT* work for >>> pf.type = spherocytosis Traceback (most recent call last): ... -AttributeError: Assignment not allowed to field "type" in protocol message object. +AttributeError: Assignment not allowed to composite field "type" in protocol message object. To set a message field, we must use the `CopyFrom` function: From 976bfe352469f6033d45b390dc8c7d854490a733 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:35:52 +0200 Subject: [PATCH 20/27] Checkout `d045bb0` in vrs-protobuf. --- src/vrs-protobuf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vrs-protobuf b/src/vrs-protobuf index d3e8c390..d045bb0c 160000 --- a/src/vrs-protobuf +++ b/src/vrs-protobuf @@ -1 +1 @@ -Subproject commit d3e8c390f80adc7a080b7156e444bf22bd20c88e +Subproject commit d045bb0c65152a0cb32177dfc21148cc13d40fbe From b0cc680a06fdf227939014089457b08bce7928d0 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:41:43 +0200 Subject: [PATCH 21/27] Only collect Python tests for now. --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index b025f255..cb68c630 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -41,5 +41,5 @@ jobs: - name: Run Python tests run: | - pytest + pytest --collect-only \ No newline at end of file From b1aa56d0b1a84d92196ba59faaad669336891ae8 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:44:17 +0200 Subject: [PATCH 22/27] Change to `python` before running Python tests. --- .github/workflows/python.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index cb68c630..6d6efa0a 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -32,7 +32,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Build with Maven + - name: Generate Python bindings with Maven run: ./mvnw -B package -DskipTests # we run tests elsewhere - name: Install Python bindings @@ -41,5 +41,6 @@ jobs: - name: Run Python tests run: | - pytest --collect-only + pwd + cd python && pytest --collect-only \ No newline at end of file From 9cf2f7e84be9c9ed544cb3f77c8e6d9525ca2282 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:45:44 +0200 Subject: [PATCH 23/27] Now run the Python tests. --- .github/workflows/python.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 6d6efa0a..7727824f 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -41,6 +41,5 @@ jobs: - name: Run Python tests run: | - pwd - cd python && pytest --collect-only + cd python && pytest \ No newline at end of file From 732764bd84f992f98da84a86edaa293dd02fbfe8 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:50:37 +0200 Subject: [PATCH 24/27] The exception message differs across Python versions. However, we do not care as long as we get the expected exception type. --- docs/python.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/python.rst b/docs/python.rst index a54316f3..0b0b8cea 100644 --- a/docs/python.rst +++ b/docs/python.rst @@ -63,7 +63,7 @@ However, setting the field values with property syntax only works for such as `bool`, `int`, `str`, or `float`, and the assignment will *NOT* work for message fields: >>> pf = pps2.PhenotypicFeature() ->>> pf.type = spherocytosis +>>> pf.type = spherocytosis # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... AttributeError: Assignment not allowed to composite field "type" in protocol message object. From 4c6fdd71524e2024a47fd24de13656a745ffa2cd Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:53:44 +0200 Subject: [PATCH 25/27] Update `maven-dependency-submission-action`. --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index fcc9a00a..14e335ee 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -34,4 +34,4 @@ jobs: # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive - name: Update dependency graph - uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6 + uses: advanced-security/maven-dependency-submission-action@v4.0.3 From 95a47c12d0918fea9f5f2003f25f62d2c2fc8198 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 12:56:03 +0200 Subject: [PATCH 26/27] Revert the `maven-dependency-submission-action` version. --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 14e335ee..fcc9a00a 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -34,4 +34,4 @@ jobs: # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive - name: Update dependency graph - uses: advanced-security/maven-dependency-submission-action@v4.0.3 + uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6 From 7372760bb5d668695065f6bf13ace93bfb15085f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Jul 2024 14:04:19 +0200 Subject: [PATCH 27/27] Mention the limitations of VRS objects in Phenopacket Schema v2. --- docs/variant.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/variant.rst b/docs/variant.rst index e29a88c8..cf2d3a92 100644 --- a/docs/variant.rst +++ b/docs/variant.rst @@ -109,6 +109,13 @@ Variation be it a genomic, transcript or protein variation. VRS also provides mechanisms for representing haplotypes and systemic variation such as Copy Number Variants (CNVs). +.. note:: + + When introduced in Phenopacket Schema v2, a protobuf version of VRS (github.com/ga4gh/vrs-protobuf) + was derived from the source VRS representation in JSON schema and used for phenopackets. + The `vrs-protobuf` message structure is losslessly transformable but syntactically distinct + from the native VRS JSON schema. + .. _rstvcfrecord: VcfRecord