From 1523801a8207760c020bdf09ab9be769f14c304f Mon Sep 17 00:00:00 2001 From: Sophia Castellarin Date: Wed, 8 Jan 2025 07:27:09 -0800 Subject: [PATCH] Remove redundant channel column (#1010) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .gitignore | 3 + .../_internal/alembic/script.py.mako | 6 +- ...6129_remove_conda_package_build_channel.py | 150 +++++++++++++ .../conda_store_server/_internal/orm.py | 9 - conda-store-server/environment-dev.yaml | 1 + conda-store-server/pyproject.toml | 1 + .../tests/_internal/alembic/__init__.py | 3 + .../_internal/alembic/version/__init__.py | 3 + ...6129_remove_conda_package_build_channel.py | 203 ++++++++++++++++++ .../tests/_internal/test_orm.py | 91 +++----- conda-store-server/tests/conftest.py | 9 + .../conda-store/references/database.md | 4 +- 12 files changed, 402 insertions(+), 81 deletions(-) create mode 100644 conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py create mode 100644 conda-store-server/tests/_internal/alembic/__init__.py create mode 100644 conda-store-server/tests/_internal/alembic/version/__init__.py create mode 100644 conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py diff --git a/.gitignore b/.gitignore index 0ec82f551..75e7762e7 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,6 @@ yarn-error.log* conda-store.sqlite *.lockb + +# generated test assets +conda-store-server/tests/alembic.ini diff --git a/conda-store-server/conda_store_server/_internal/alembic/script.py.mako b/conda-store-server/conda_store_server/_internal/alembic/script.py.mako index ac4bee237..34b973937 100644 --- a/conda-store-server/conda_store_server/_internal/alembic/script.py.mako +++ b/conda-store-server/conda_store_server/_internal/alembic/script.py.mako @@ -1,6 +1,6 @@ -// Copyright (c) conda-store development team. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +# Copyright (c) conda-store development team. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. """${message} diff --git a/conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py b/conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py new file mode 100644 index 000000000..cfecde382 --- /dev/null +++ b/conda-store-server/conda_store_server/_internal/alembic/versions/89637f546129_remove_conda_package_build_channel.py @@ -0,0 +1,150 @@ +# Copyright (c) conda-store development team. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +"""remove conda package build channel + +Revision ID: 89637f546129 +Revises: bf065abf375b +Create Date: 2024-12-04 13:09:25.562450 + +""" +from alembic import op +from sqlalchemy import Column, INTEGER, String, ForeignKey, table, select + + +# revision identifiers, used by Alembic. +revision = '89637f546129' +down_revision = 'bf065abf375b' +branch_labels = None +depends_on = None + +# Due to the issue fixed in https://github.com/conda-incubator/conda-store/pull/961 +# many conda_package_build entries have the wrong package entry (but the right channel). +# Because the packages are duplicated, we can not recreate the _conda_package_build_uc +# constraint without the channel_id. +# So, this function will go thru each conda_package_build and re-associate it with the +# correct conda_package based on the channel id. +def fix_misrepresented_packages(conn): + # conda_packages is a hash of channel-id_name_version to conda_package id + conda_packages = {} + + # dummy tables to run queries against + conda_package_build_table = table( + "conda_package_build", + Column("id", INTEGER), + Column("channel_id", INTEGER), + Column("package_id", INTEGER, ForeignKey("conda_package.id")), + ) + conda_package_table = table( + "conda_package", + Column("id", INTEGER), + Column("channel_id", INTEGER), + Column("name", String), + Column("version", String), + ) + + def get_conda_package_id(conn, channel_id, name, version): + hashed_name = f"{channel_id}-{name}-{version}" + + # if package exists in the conda_packages dict, return it + if hashed_name in conda_packages: + return conda_packages[hashed_name] + + # if not, then query the db for the package + package = conn.execute( + select(conda_package_table).where( + conda_package_table.c.channel_id == channel_id, + conda_package_table.c.name == name, + conda_package_table.c.version == version, + ) + ).first() + + # save the package into the conda_packages dict + conda_packages[hashed_name] = package.id + return package.id + + for row in conn.execute( + select( + conda_package_build_table.c.id, + conda_package_build_table.c.package_id, + conda_package_build_table.c.channel_id, + conda_package_table.c.name, + conda_package_table.c.version + ).join( + conda_package_build_table, + conda_package_build_table.c.package_id == conda_package_table.c.id + ) + ): + # the channel_id might already be empty + if row[2] is None: + continue + + package_id = get_conda_package_id(conn, row[2], row[3], row[4]) + # if found package id does not match the found package id, we'll need to updated it + if package_id != row[1]: + update_package_query = conda_package_build_table.update().where( + conda_package_build_table.c.id == op.inline_literal(row[0]) + ).values( + {"package_id": op.inline_literal(package_id)} + ) + conn.execute(update_package_query) + conn.commit() + +def upgrade(): + bind = op.get_bind() + + # So, go thru each conda_package_build and re-associate it with the correct conda_package + # based on the channel id. + fix_misrepresented_packages(bind) + + with op.batch_alter_table("conda_package_build") as batch_op: + # remove channel column from constraints + batch_op.drop_constraint( + "_conda_package_build_uc", + ) + + # re-add the constraint without the channel column + batch_op.create_unique_constraint( + "_conda_package_build_uc", + [ + "package_id", + "subdir", + "build", + "build_number", + "sha256", + ], + ) + + # remove channel column + batch_op.drop_column( + "channel_id", + ) + + +def downgrade(): + with op.batch_alter_table("conda_package_build") as batch_op: + # remove channel column from constraints + batch_op.drop_constraint( + constraint_name="_conda_package_build_uc", + ) + + # add channel column + batch_op.add_column( + Column("channel_id", INTEGER) + ) + + batch_op.create_foreign_key("fk_channel_id", "conda_channel", ["channel_id"], ["id"]) + + # re-add the constraint with the channel column + batch_op.create_unique_constraint( + constraint_name="_conda_package_build_uc", + columns=[ + "channel_id", + "package_id", + "subdir", + "build", + "build_number", + "sha256", + ], + ) diff --git a/conda-store-server/conda_store_server/_internal/orm.py b/conda-store-server/conda_store_server/_internal/orm.py index b3eb0879e..e3eb73c9a 100644 --- a/conda-store-server/conda_store_server/_internal/orm.py +++ b/conda-store-server/conda_store_server/_internal/orm.py @@ -757,7 +757,6 @@ class CondaPackageBuild(Base): __table_args__ = ( UniqueConstraint( - "channel_id", "package_id", "subdir", "build", @@ -772,14 +771,6 @@ class CondaPackageBuild(Base): package_id: Mapped[int] = mapped_column(ForeignKey("conda_package.id")) package: Mapped["CondaPackage"] = relationship(back_populates="builds") - """ - Some package builds have the exact same data from different channels. - Thus, when adding a channel, populating CondaPackageBuild can encounter - duplicate keys errors. That's why we need to distinguish them by channel_id. - """ - channel_id: Mapped[int] = mapped_column(ForeignKey("conda_channel.id")) - channel: Mapped["CondaChannel"] = relationship(CondaChannel) - build: Mapped[str] = mapped_column(Unicode(64), index=True) build_number: Mapped[int] constrains: Mapped[dict] = mapped_column(JSON) diff --git a/conda-store-server/environment-dev.yaml b/conda-store-server/environment-dev.yaml index 974f0465a..f7fa54a2e 100644 --- a/conda-store-server/environment-dev.yaml +++ b/conda-store-server/environment-dev.yaml @@ -22,6 +22,7 @@ dependencies: - pytest-celery - pytest-mock - pytest-cov + - pytest-alembic - docker-py<=7 - docker-compose # build dependencies diff --git a/conda-store-server/pyproject.toml b/conda-store-server/pyproject.toml index cc24b1c1f..8db52c10f 100644 --- a/conda-store-server/pyproject.toml +++ b/conda-store-server/pyproject.toml @@ -98,6 +98,7 @@ dependencies = [ "pytest", "pytest-celery", "pytest-playwright", + "pytest-alembic", "twine>=5.0.0", "pkginfo>=1.10", # Needed to support metadata 2.3 "pytest-cov", diff --git a/conda-store-server/tests/_internal/alembic/__init__.py b/conda-store-server/tests/_internal/alembic/__init__.py new file mode 100644 index 000000000..b559bd2ff --- /dev/null +++ b/conda-store-server/tests/_internal/alembic/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) conda-store development team. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. diff --git a/conda-store-server/tests/_internal/alembic/version/__init__.py b/conda-store-server/tests/_internal/alembic/version/__init__.py new file mode 100644 index 000000000..b559bd2ff --- /dev/null +++ b/conda-store-server/tests/_internal/alembic/version/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) conda-store development team. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. diff --git a/conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py b/conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py new file mode 100644 index 000000000..35c8d7b8e --- /dev/null +++ b/conda-store-server/tests/_internal/alembic/version/test_89637f546129_remove_conda_package_build_channel.py @@ -0,0 +1,203 @@ +# Copyright (c) conda-store development team. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +import pytest +from sqlalchemy import text +from sqlalchemy.exc import OperationalError + +from conda_store_server import api +from conda_store_server._internal import orm + + +def setup_bad_data_db(conda_store): + """A database fixture populated with + * 2 channels + * 2 conda packages + * 5 conda package builds + """ + with conda_store.session_factory() as db: + # create test channels + api.create_conda_channel(db, "test-channel-1") + api.create_conda_channel(db, "test-channel-2") + db.commit() + + # create some sample conda_package's + # For simplicity, the channel_id's match the id of the conda_package. + # So, when checking that the package build entries have been reassembled + # the right way, check that the package_id in the conda_package_build is + # equal to what would have been the channel_id (before the migration is run) + conda_package_records = [ + { + "id": 1, + "channel_id": 1, + "name": "test-package-1", + "version": "1.0.0", + }, + { + "id": 2, + "channel_id": 2, + "name": "test-package-1", + "version": "1.0.0", + }, + ] + for cpb in conda_package_records: + conda_package = orm.CondaPackage(**cpb) + db.add(conda_package) + db.commit() + + # create some conda_package_build's + conda_package_builds = [ + { + "id": 1, + "build": "py310h06a4308_0", + "package_id": 1, + "build_number": 0, + "sha256": "one", + "subdir": "linux-64", + }, + { + "id": 2, + "build": "py311h06a4308_0", + "package_id": 1, + "build_number": 0, + "sha256": "two", + "subdir": "linux-64", + }, + { + "id": 3, + "build": "py38h06a4308_0", + "package_id": 1, + "build_number": 0, + "sha256": "three", + "subdir": "linux-64", + }, + { + "id": 4, + "build": "py39h06a4308_0", + "package_id": 2, + "build_number": 0, + "sha256": "four", + "subdir": "linux-64", + }, + { + "id": 5, + "build": "py310h06a4308_0", + "package_id": 2, + "build_number": 0, + "sha256": "five", + "subdir": "linux-64", + }, + ] + default_values = { + "depends": "", + "md5": "", + "timestamp": 0, + "constrains": "", + "size": 0, + } + for cpb in conda_package_builds: + conda_package = orm.CondaPackageBuild(**cpb, **default_values) + db.add(conda_package) + db.commit() + + # force in some channel data + # conda_package_build 1 should have package_id 2 after migration + db.execute(text("UPDATE conda_package_build SET channel_id=2 WHERE id=1")) + # conda_package_build 2 should have package_id 1 after migration + db.execute(text("UPDATE conda_package_build SET channel_id=1 WHERE id=2")) + # conda_package_build 3 should have package_id 1 after migration + db.execute(text("UPDATE conda_package_build SET channel_id=1 WHERE id=3")) + # conda_package_build 4 should have package_id 2 after migration + db.execute(text("UPDATE conda_package_build SET channel_id=2 WHERE id=4")) + + # don't set conda_package_build 5 channel_id as a test case + # conda_package_build 5 package_id should be unchanged (2) after migration + + db.commit() + + +def test_remove_conda_package_build_channel_basic( + conda_store, alembic_config, alembic_runner +): + """Simply run the upgrade and downgrade for this migration""" + # migrate all the way to the target revision + alembic_runner.migrate_up_to("89637f546129") + + # try downgrading + alembic_runner.migrate_down_one() + + # ensure the channel_id column exists, will error if channel_id column does not exist + with conda_store.session_factory() as db: + db.execute(text("SELECT channel_id from conda_package_build")) + + # try upgrading once more + alembic_runner.migrate_up_one() + + # ensure the channel_id column exists, will error if channel_id column does not exist + with conda_store.session_factory() as db: + with pytest.raises(OperationalError): + db.execute(text("SELECT channel_id from conda_package_build")) + + +def test_remove_conda_package_build_bad_data( + conda_store, alembic_config, alembic_runner +): + """Simply run the upgrade and downgrade for this migration""" + # migrate all the way to the target revision + alembic_runner.migrate_up_to("89637f546129") + + # try downgrading + alembic_runner.migrate_down_one() + + # ensure the channel_id column exists, will error if channel_id column does not exist + with conda_store.session_factory() as db: + db.execute(text("SELECT channel_id from conda_package_build")) + + # seed db with data that has broken data + setup_bad_data_db(conda_store) + + # try upgrading once more + alembic_runner.migrate_up_one() + + # ensure the channel_id column exists, will error if channel_id column does not exist + with conda_store.session_factory() as db: + with pytest.raises(OperationalError): + db.execute(text("SELECT channel_id from conda_package_build")) + + # ensure all packages builds have the right package associated + with conda_store.session_factory() as db: + build = ( + db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.id == 1) + .first() + ) + assert build.package_id == 2 + + build = ( + db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.id == 2) + .first() + ) + assert build.package_id == 1 + + build = ( + db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.id == 3) + .first() + ) + assert build.package_id == 1 + + build = ( + db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.id == 4) + .first() + ) + assert build.package_id == 2 + + build = ( + db.query(orm.CondaPackageBuild) + .filter(orm.CondaPackageBuild.id == 5) + .first() + ) + assert build.package_id == 2 diff --git a/conda-store-server/tests/_internal/test_orm.py b/conda-store-server/tests/_internal/test_orm.py index d35601be2..ac2e45a73 100644 --- a/conda-store-server/tests/_internal/test_orm.py +++ b/conda-store-server/tests/_internal/test_orm.py @@ -53,7 +53,6 @@ def populated_db(db): 1, { "build": "py310h06a4308_0", - "channel_id": 1, "build_number": 0, "sha256": "11f080b53b36c056dbd86ccd6dc56c40e3e70359f64279b1658bb69f91ae726f", "subdir": "linux-64", @@ -68,7 +67,6 @@ def populated_db(db): 1, { "build": "py311h06a4308_0", - "channel_id": 1, "build_number": 0, "sha256": "f0719ee6940402a1ea586921acfaf752fda977dbbba74407856a71ba7f6c4e4a", "subdir": "linux-64", @@ -83,7 +81,6 @@ def populated_db(db): 1, { "build": "py38h06a4308_0", - "channel_id": 1, "build_number": 0, "sha256": "39e39a23baebd0598c1b59ae0e82b5ffd6a3230325da4c331231d55cbcf13b3e", "subdir": "linux-64", @@ -220,14 +217,13 @@ def test_update_packages_add_existing_pkg_new_version( assert count == 2 count = populated_db.query(orm.CondaPackage).count() assert count == 3 - builds = ( + num_builds = ( populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 1) - .all() + .join(orm.CondaPackage) + .filter(orm.CondaPackage.channel_id == 1) + .count() ) - assert len(builds) == 4 - for b in builds: - assert b.package.channel_id == 1 + assert num_builds == 4 count = populated_db.query(orm.CondaPackageBuild).count() assert count == 4 @@ -295,14 +291,13 @@ def test_update_packages_multiple_subdirs(mock_repdata, populated_db): .count() ) assert count == 2 - builds = ( + num_builds = ( populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 1) - .all() + .join(orm.CondaPackage) + .filter(orm.CondaPackage.channel_id == 1) + .count() ) - assert len(builds) == 5 - for b in builds: - assert b.package.channel_id == 1 + assert num_builds == 5 @mock.patch("conda_store_server._internal.conda_utils.download_repodata") @@ -326,13 +321,15 @@ def check_packages(): assert len(conda_packages) == 1 conda_packages = ( populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 1) + .join(orm.CondaPackage) + .filter(orm.CondaPackage.channel_id == 1) .all() ) assert len(conda_packages) == 4 conda_packages = ( populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 2) + .join(orm.CondaPackage) + .filter(orm.CondaPackage.channel_id == 2) .all() ) assert len(conda_packages) == 0 @@ -371,14 +368,13 @@ def test_update_packages_new_package_channel(mock_repdata, populated_db, test_re .count() ) assert count == 2 - builds = ( + num_builds = ( populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 2) - .all() + .join(orm.CondaPackage) + .filter(orm.CondaPackage.channel_id == 2) + .count() ) - assert len(builds) == 1 - for b in builds: - assert b.package.channel_id == 2 + assert num_builds == 1 count = populated_db.query(orm.CondaPackageBuild).count() assert count == 4 @@ -405,49 +401,10 @@ def test_update_packages_multiple_builds( # ensure it is added to conda package builds count = populated_db.query(orm.CondaPackageBuild).count() assert count == 5 - builds = ( - populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 2) - .all() - ) - assert len(builds) == 2 - for b in builds: - assert b.package.channel_id == 2 - - -@mock.patch("conda_store_server._internal.conda_utils.download_repodata") -def test_update_packages_channel_consistency( - mock_repdata, populated_db, test_repodata_multiple_packages -): - mock_repdata.return_value = test_repodata_multiple_packages - - channel = ( - populated_db.query(orm.CondaChannel).filter(orm.CondaChannel.id == 2).first() - ) - channel.update_packages(populated_db, "linux-64") - - # ensure the package builds end up with the correct channel - builds = ( - populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 2) - .all() - ) - for b in builds: - assert b.channel_id == 2 - assert b.package.channel_id == 2 - - # again with another channel - channel = ( - populated_db.query(orm.CondaChannel).filter(orm.CondaChannel.id == 1).first() - ) - channel.update_packages(populated_db, "linux-64") - - # ensure the package builds end up with the correct channel - builds = ( + num_builds = ( populated_db.query(orm.CondaPackageBuild) - .filter(orm.CondaPackageBuild.channel_id == 1) - .all() + .join(orm.CondaPackage) + .filter(orm.CondaPackage.channel_id == 2) + .count() ) - for b in builds: - assert b.channel_id == 1 - assert b.package.channel_id == 1 + assert num_builds == 2 diff --git a/conda-store-server/tests/conftest.py b/conda-store-server/tests/conftest.py index f1cebe23b..7365b5e48 100644 --- a/conda-store-server/tests/conftest.py +++ b/conda-store-server/tests/conftest.py @@ -290,6 +290,15 @@ def plugin_manager(): return pm +@pytest.fixture +def alembic_config(conda_store): + from conda_store_server._internal.dbutil import write_alembic_ini + + ini_file = pathlib.Path(__file__).parent / "alembic.ini" + write_alembic_ini(ini_file, conda_store.database_url) + return {"file": ini_file} + + def _seed_conda_store( db: Session, conda_store, diff --git a/docusaurus-docs/conda-store/references/database.md b/docusaurus-docs/conda-store/references/database.md index d183b16fe..9c91e1f37 100644 --- a/docusaurus-docs/conda-store/references/database.md +++ b/docusaurus-docs/conda-store/references/database.md @@ -46,7 +46,7 @@ conda-store relies on [SQLAlchemy](https://www.sqlalchemy.org/) for ORM mapping, The procedure to modify the database is the following : - First, modify [the ORM Model](https://github.com/conda-incubator/conda-store/blob/main/conda-store-server/conda_store_server/orm.py) according to the changes you want to make -- edit the file `conda-store-server/alembic.ini` and replace the value for entry `sqlalchemy.url` to match the connection URL of your database. +- edit the file `conda-store-server/conda_store_server/_internal/alembic.ini` and replace the value for entry `sqlalchemy.url` to match the connection URL of your database. For example (when postgres was started via Docker compose): ``` @@ -57,7 +57,7 @@ sqlalchemy.url = postgresql+psycopg2://postgres:password@localhost:5432/conda-st - in your command line, run the following : ```sh -cd conda-store-server/conda_store_server +cd conda-store-server/conda_store_server/_internal alembic revision --autogenerate -m "description of your changes" ```