From 42953191074ab310aa8572dac100650936026eb6 Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Thu, 13 Jun 2024 12:03:46 -0700 Subject: [PATCH 1/6] chore: unfreeze dev (#7186) --- .github/workflows/build-images-and-create-deployment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-images-and-create-deployment.yml b/.github/workflows/build-images-and-create-deployment.yml index 9a3ae1a7cffc8..fbe35ab94cdd7 100644 --- a/.github/workflows/build-images-and-create-deployment.yml +++ b/.github/workflows/build-images-and-create-deployment.yml @@ -45,7 +45,7 @@ jobs: uses: avakar/create-deployment@v1 # To stop deployment to a specific DEPLOYMENT_STAGE remove it from condition below. # The DEPLOYMENT_STAGE that should be present are dev, stage, prod. - if: env.DEPLOYMENT_STAGE == 'prod' || env.DEPLOYMENT_STAGE == 'stage' + if: env.DEPLOYMENT_STAGE == 'prod' || env.DEPLOYMENT_STAGE == 'stage' || env.DEPLOYMENT_STAGE == 'dev' with: auto_merge: false environment: ${{ env.DEPLOYMENT_STAGE }} From ed8cd5e55924ba8bca13a187542b9fa73f8a563b Mon Sep 17 00:00:00 2001 From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:14:23 -0400 Subject: [PATCH 2/6] feat: remove auto_publish from migration (#7156) --- .../modules/schema_migration/main.tf | 23 ++--- backend/layers/processing/schema_migration.py | 85 +++++++---------- .../test_collection_migrate.py | 91 +++++++------------ .../test_gather_collections.py | 23 ++--- ...anup.py => test_log_errors_and_cleanup.py} | 55 +++-------- 5 files changed, 91 insertions(+), 186 deletions(-) rename tests/unit/processing/schema_migration/{test_publish_and_cleanup.py => test_log_errors_and_cleanup.py} (71%) diff --git a/.happy/terraform/modules/schema_migration/main.tf b/.happy/terraform/modules/schema_migration/main.tf index aa15694b5c9b5..c10f3904556ea 100644 --- a/.happy/terraform/modules/schema_migration/main.tf +++ b/.happy/terraform/modules/schema_migration/main.tf @@ -180,7 +180,6 @@ resource aws_sfn_state_machine sfn_schema_migration { "Next": "ApplyDefaults", "ResultPath": "$.inputDefaults", "Parameters": { - "auto_publish": "False", "limit_migration": "0" } }, @@ -221,10 +220,6 @@ resource aws_sfn_state_machine sfn_schema_migration { "Name": "EXECUTION_ID", "Value.$": "$$.Execution.Name" }, - { - "Name": "AUTO_PUBLISH", - "Value.$": "$.auto_publish" - }, { "Name": "LIMIT_MIGRATION", "Value.$": "$.limit_migration" @@ -286,10 +281,6 @@ resource aws_sfn_state_machine sfn_schema_migration { "Name": "COLLECTION_VERSION_ID", "Value.$": "$.collection_version_id" }, - { - "Name": "CAN_PUBLISH", - "Value.$": "$.can_publish" - }, { "Name": "TASK_TOKEN", "Value.$": "$$.Task.Token" @@ -314,7 +305,7 @@ resource aws_sfn_state_machine sfn_schema_migration { "States.ALL" ], "ResultPath": null, - "Next": "CollectionPublish" + "Next": "CollectionCleanup" } ] }, @@ -324,17 +315,17 @@ resource aws_sfn_state_machine sfn_schema_migration { { "Variable": "$.key_name", "IsPresent": false, - "Next": "CollectionPublish" + "Next": "CollectionCleanup" } ], "Default": "SpanDatasets" }, - "CollectionPublish": { + "CollectionCleanup": { "Type": "Task", "Resource": "arn:aws:states:::batch:submitJob.sync", "Parameters": { "JobDefinition": "${resource.aws_batch_job_definition.schema_migrations.arn}", - "JobName": "Collection_publish", + "JobName": "Collection_cleanup", "JobQueue": "${var.job_queue_arn}", "Timeout": { "AttemptDurationSeconds": 600 @@ -343,7 +334,7 @@ resource aws_sfn_state_machine sfn_schema_migration { "Environment": [ { "Name": "STEP_NAME", - "Value": "collection_publish" + "Value": "collection_cleanup" }, { "Name": "MIGRATE", @@ -497,14 +488,14 @@ resource aws_sfn_state_machine sfn_schema_migration { "Key.$": "$.key_name" } }, - "Next": "CollectionPublish", + "Next": "CollectionCleanup", "MaxConcurrency": 10, "Catch": [ { "ErrorEquals": [ "States.ALL" ], - "Next": "CollectionPublish", + "Next": "CollectionCleanup", "ResultPath": null } ], diff --git a/backend/layers/processing/schema_migration.py b/backend/layers/processing/schema_migration.py index 5ac14a516580c..edfddc8dcb89d 100644 --- a/backend/layers/processing/schema_migration.py +++ b/backend/layers/processing/schema_migration.py @@ -43,53 +43,35 @@ def fetch_collections(self) -> Iterable[CollectionVersion]: unpublished_collections = [*self.business_logic.get_collections(CollectionQueryFilter(is_published=False))] return itertools.chain(unpublished_collections, published_collections) - def gather_collections(self, auto_publish: bool) -> Tuple[Dict[str, str], Dict[str, str]]: + def gather_collections(self) -> Tuple[Dict[str, str], Dict[str, str]]: """ This function is used to gather all the collections and their datasets that will be migrated A json file is created and uploaded to S3 with the list of collections and datasets that will be migrated. It has the following structure: [ - {"can_publish": "true", "collection_id": "", "collection_version_id": - ""}, - {"can_publish": "false", "collection_id": "", "collection_version_id": - ""} + {"collection_id": "", "collection_version_id": ""}, + {"collection_id": "", "collection_version_id": ""} ... ] - :param auto_publish: bool - if False, coerce can_publish to False for all collections. if True, determine - can_publish on collection-by-collection basis based on business logic - :return: the response retuned to the step function and the list of collections to be migrated + :return: the response returned to the step function and the list of collections to be migrated """ response_for_span_collections = [] has_migration_revision = set() # iterates over unpublished collections first, so published versions are skipped if there is an active revision for collection in self.fetch_collections(): - _resp = {} if collection.is_published() and collection.collection_id.id in has_migration_revision: continue - if collection.is_published(): - # published collection without an active revision - _resp["can_publish"] = str(True) - elif collection.is_unpublished_version(): - # active revision of a published collection. - _resp["can_publish"] = str(False) - if collection.is_auto_version: - has_migration_revision.add(collection.collection_id.id) # migration revision found, skip published - _resp["can_publish"] = str(True) - elif collection.is_initial_unpublished_version(): - # unpublished collection - _resp["can_publish"] = str(False) - - if not auto_publish: - # auto_publish is off for this migration, overwrite "can_publish" as false in all cases. - _resp["can_publish"] = str(False) - _resp.update( - collection_id=collection.collection_id.id, - collection_version_id=collection.version_id.id, - execution_id=self.execution_id, - ) + if collection.is_auto_version: + has_migration_revision.add(collection.collection_id.id) # migration revision found, skip published + + _resp = { + "collection_id": collection.collection_id.id, + "collection_version_id": collection.version_id.id, + "execution_id": self.execution_id, + } response_for_span_collections.append(_resp) # For testing purposes, only migrate a randomly sampled subset of the collections gathered @@ -139,16 +121,17 @@ def dataset_migrate( } def collection_migrate( - self, collection_id: str, collection_version_id: str, can_publish: bool + self, + collection_id: str, + collection_version_id: str, ) -> Tuple[Dict[str, str], Dict[str, str], List[Dict[str, str]]]: """ This function is used to migrate a collection and its datasets to the latest schema version. :param collection_id: the canonical collection id :param collection_version_id: the collection version to migrate - :param can_publish: if True, the collection will be published after migration - :return: the response retuned to the step function, the response for the publish_and_cleanup step function, and - the list of datasets to be migrated + :return: the response retuned to the step function, the response for the log_errors_and_cleanup step function, + and the list of datasets to be migrated """ # Get datasets from collection version = self.business_logic.get_collection_version(CollectionVersionId(collection_version_id)) @@ -167,9 +150,7 @@ def collection_migrate( "All datasets in the collection have been migrated", extra={"dataset_count": len(version.datasets)} ) response_for_dataset_migrate = [] - response_for_publish_and_cleanup = { - "can_publish": str(False), # skip publishing, because the collection is already published and no - # revision is created, or the collection is private or a revision. + response_for_log_errors_and_cleanup = { "collection_version_id": collection_version_id, } response_for_sfn = {"collection_version_id": collection_version_id} @@ -195,30 +176,31 @@ def collection_migrate( if dataset.status.processing_status == DatasetProcessingStatus.SUCCESS # Filter out datasets that are not successfully processed ] - response_for_publish_and_cleanup = { - "can_publish": str(can_publish), + response_for_log_errors_and_cleanup = { "collection_version_id": private_collection_version_id, } response_for_sfn = {"collection_version_id": private_collection_version_id} - response_for_publish_and_cleanup["datasets"] = response_for_dataset_migrate - response_for_publish_and_cleanup["collection_url"] = collection_url + response_for_log_errors_and_cleanup["datasets"] = response_for_dataset_migrate + response_for_log_errors_and_cleanup["collection_url"] = collection_url response_for_sfn["execution_id"] = self.execution_id - self._store_sfn_response("publish_and_cleanup", version.collection_id.id, response_for_publish_and_cleanup) + self._store_sfn_response( + "log_errors_and_cleanup", version.collection_id.id, response_for_log_errors_and_cleanup + ) if response_for_dataset_migrate: key_name = self._store_sfn_response("span_datasets", version.collection_id.id, response_for_dataset_migrate) response_for_sfn["key_name"] = key_name - return (response_for_sfn, response_for_publish_and_cleanup, response_for_dataset_migrate) + return (response_for_sfn, response_for_log_errors_and_cleanup, response_for_dataset_migrate) - def publish_and_cleanup(self, collection_version_id: str) -> list: + def log_errors_and_cleanup(self, collection_version_id: str) -> list: errors = [] collection_version = self.business_logic.get_collection_version(CollectionVersionId(collection_version_id)) object_keys_to_delete = [] # Get the datasets that were processed - extra_info = self._retrieve_sfn_response("publish_and_cleanup", collection_version.collection_id.id) + extra_info = self._retrieve_sfn_response("log_errors_and_cleanup", collection_version.collection_id.id) processed_datasets = {d["dataset_id"]: d["dataset_version_id"] for d in extra_info["datasets"]} # Process datasets errors @@ -275,8 +257,6 @@ def publish_and_cleanup(self, collection_version_id: str) -> list: self.s3_provider.delete_files(self.artifact_bucket, object_keys_to_delete) if errors: self._store_sfn_response("report/errors", collection_version_id, errors) - elif extra_info["can_publish"].lower() == "true": - self.business_logic.publish_collection_version(collection_version.version_id) return errors def _store_sfn_response(self, directory: str, file_name: str, response: Dict[str, str]): @@ -379,17 +359,14 @@ def migrate(self, step_name) -> bool: self.logger.info(f"Starting {step_name}", extra={"step": step_name}) if step_name == "gather_collections": gather_collections = self.error_wrapper(self.gather_collections, "gather_collections") - auto_publish = os.environ["AUTO_PUBLISH"].lower() == "true" - response, _ = gather_collections(auto_publish) + response, _ = gather_collections() elif step_name == "collection_migrate": collection_id = os.environ["COLLECTION_ID"] collection_version_id = os.environ["COLLECTION_VERSION_ID"] - can_publish = os.environ["CAN_PUBLISH"].lower() == "true" collection_migrate = self.error_wrapper(self.collection_migrate, collection_id) response, _, _ = collection_migrate( collection_id=collection_id, collection_version_id=collection_version_id, - can_publish=can_publish, ) elif step_name == "dataset_migrate": collection_version_id = os.environ["COLLECTION_VERSION_ID"] @@ -403,10 +380,10 @@ def migrate(self, step_name) -> bool: dataset_id=dataset_id, dataset_version_id=dataset_version_id, ) - elif step_name == "collection_publish": + elif step_name == "collection_cleanup": collection_version_id = os.environ["COLLECTION_VERSION_ID"] - publish_and_cleanup = self.error_wrapper(self.publish_and_cleanup, collection_version_id) - response = publish_and_cleanup(collection_version_id=collection_version_id) + log_errors_and_cleanup = self.error_wrapper(self.log_errors_and_cleanup, collection_version_id) + response = log_errors_and_cleanup(collection_version_id=collection_version_id) elif step_name == "report": response = self.report() self.logger.info("output", extra={"response": response}) diff --git a/tests/unit/processing/schema_migration/test_collection_migrate.py b/tests/unit/processing/schema_migration/test_collection_migrate.py index ac6d7d3be21d7..ed45ad3e5da08 100644 --- a/tests/unit/processing/schema_migration/test_collection_migrate.py +++ b/tests/unit/processing/schema_migration/test_collection_migrate.py @@ -4,7 +4,7 @@ class TestCollectionMigrate: - def test_can_publish_true(self, schema_migrate_and_collections): + def test_migrate_published_collection(self, schema_migrate_and_collections): schema_migrate, collections = schema_migrate_and_collections schema_migrate._store_sfn_response = Mock(wraps=schema_migrate._store_sfn_response) schema_migrate.schema_version = "0.0.0" @@ -20,18 +20,19 @@ def test_can_publish_true(self, schema_migrate_and_collections): } for dataset in published.datasets ] - response, response_for_publish_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( - published.collection_id.id, published.version_id.id, True + response, response_for_log_errors_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( + published.collection_id.id, + published.version_id.id, ) schema_migrate._store_sfn_response.assert_any_call( - "publish_and_cleanup", published.collection_id.id, response_for_publish_and_cleanup + "log_errors_and_cleanup", published.collection_id.id, response_for_log_errors_and_cleanup ) schema_migrate._store_sfn_response.assert_any_call( "span_datasets", published.collection_id.id, response_for_span_datasets ) - assert response_for_publish_and_cleanup["collection_version_id"] == collection_version_id.id + assert response_for_log_errors_and_cleanup["collection_version_id"] == collection_version_id.id assert ( - response_for_publish_and_cleanup["collection_url"] + response_for_log_errors_and_cleanup["collection_url"] == f"https://collections_domain/collections/{published.collection_id.id}" ) assert "key_name" in response @@ -40,7 +41,7 @@ def test_can_publish_true(self, schema_migrate_and_collections): assert response_for_span_datasets[i].pop("collection_id") == published.collection_id.id assert response_for_span_datasets[i] == datasets[i] - def test_can_publish_false(self, schema_migrate_and_collections): + def test_migrate_private_collection(self, schema_migrate_and_collections): schema_migrate, collections = schema_migrate_and_collections schema_migrate._store_sfn_response = Mock(wraps=schema_migrate._store_sfn_response) schema_migrate.schema_version = "0.0.0" @@ -54,20 +55,21 @@ def test_can_publish_false(self, schema_migrate_and_collections): } for dataset in private.datasets ] - response, response_for_publish_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( - private.collection_id.id, private.version_id.id, False + response, response_for_log_errors_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( + private.collection_id.id, + private.version_id.id, ) schema_migrate._store_sfn_response.assert_any_call( - "publish_and_cleanup", private.collection_id.id, response_for_publish_and_cleanup + "log_errors_and_cleanup", private.collection_id.id, response_for_log_errors_and_cleanup ) schema_migrate._store_sfn_response.assert_any_call( "span_datasets", private.collection_id.id, response_for_span_datasets ) - # verify response_for_publish_and_cleanup - assert response_for_publish_and_cleanup["collection_version_id"] == private.version_id.id + # verify response_for_log_errors_and_cleanup + assert response_for_log_errors_and_cleanup["collection_version_id"] == private.version_id.id assert ( - response_for_publish_and_cleanup["collection_url"] + response_for_log_errors_and_cleanup["collection_url"] == f"https://collections_domain/collections/{private.collection_id.id}" ) @@ -82,50 +84,24 @@ def test_can_publish_false(self, schema_migrate_and_collections): assert response_for_span_datasets[i].pop("collection_id") == private.collection_id.id assert response_for_span_datasets[i] == datasets[i] - def test_can_publish_false_and_no_datasets(self, schema_migrate_and_collections): + def test_filter_schema_version(self, schema_migrate_and_collections): schema_migrate, collections = schema_migrate_and_collections schema_migrate._store_sfn_response = Mock(wraps=schema_migrate._store_sfn_response) - schema_migrate.schema_version = "0.0.0" published = collections["published"][0] - published.datasets = [] schema_migrate.business_logic.create_collection_version.return_value = Mock(version_id=CollectionVersionId()) - response, response_for_publish_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( - published.collection_id.id, published.version_id.id, False + response, response_for_log_errors_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( + published.collection_id.id, + published.version_id.id, ) schema_migrate._store_sfn_response.assert_called_once_with( - "publish_and_cleanup", published.collection_id.id, response_for_publish_and_cleanup - ) - - # verify response_for_publish_and_cleanup - assert response_for_publish_and_cleanup["collection_version_id"] == published.version_id.id - assert ( - response_for_publish_and_cleanup["collection_url"] - == f"https://collections_domain/collections/{published.collection_id.id}" + "log_errors_and_cleanup", published.collection_id.id, response_for_log_errors_and_cleanup ) - # verify response_for_span_datasets - assert not response_for_span_datasets - - # verify response - assert "key_name" not in response - assert response["collection_version_id"] == published.version_id.id - assert response["execution_id"] == "test-execution-arn" - def test_can_publish_true_and_filtered_schema_version(self, schema_migrate_and_collections): - schema_migrate, collections = schema_migrate_and_collections - schema_migrate._store_sfn_response = Mock(wraps=schema_migrate._store_sfn_response) - published = collections["published"][0] - schema_migrate.business_logic.create_collection_version.return_value = Mock(version_id=CollectionVersionId()) - response, response_for_publish_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( - published.collection_id.id, published.version_id.id, False - ) - schema_migrate._store_sfn_response.assert_called_once_with( - "publish_and_cleanup", published.collection_id.id, response_for_publish_and_cleanup - ) + # verify response_for_log_errors_and_cleanup - # verify response_for_publish_and_cleanup - assert response_for_publish_and_cleanup["collection_version_id"] == published.version_id.id + assert response_for_log_errors_and_cleanup["collection_version_id"] == published.version_id.id assert ( - response_for_publish_and_cleanup["collection_url"] + response_for_log_errors_and_cleanup["collection_url"] == f"https://collections_domain/collections/{published.collection_id.id}" ) @@ -143,17 +119,18 @@ def test_no_datasets(self, schema_migrate_and_collections): published = collections["published"][0] published.datasets = [] schema_migrate.business_logic.create_collection_version.return_value = Mock(version_id=CollectionVersionId()) - response, response_for_publish_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( - published.collection_id.id, published.version_id.id, False + response, response_for_log_errors_and_cleanup, response_for_span_datasets = schema_migrate.collection_migrate( + published.collection_id.id, + published.version_id.id, ) schema_migrate._store_sfn_response.assert_called_once_with( - "publish_and_cleanup", published.collection_id.id, response_for_publish_and_cleanup + "log_errors_and_cleanup", published.collection_id.id, response_for_log_errors_and_cleanup ) - # verify response_for_publish_and_cleanup - assert response_for_publish_and_cleanup["collection_version_id"] == published.version_id.id + # verify response_for_log_errors_and_cleanup + assert response_for_log_errors_and_cleanup["collection_version_id"] == published.version_id.id assert ( - response_for_publish_and_cleanup["collection_url"] + response_for_log_errors_and_cleanup["collection_url"] == f"https://collections_domain/collections/{published.collection_id.id}" ) @@ -175,7 +152,7 @@ def test_create_migration_revision__private(self, schema_migrate_and_collections ) # only call create_collection_version if the collection is published - schema_migrate.collection_migrate(private.collection_id.id, private.version_id.id, False) + schema_migrate.collection_migrate(private.collection_id.id, private.version_id.id) schema_migrate.business_logic.create_collection_version.assert_not_called() def test_create_migration_revision__published_with_revision(self, schema_migrate_and_collections): @@ -187,10 +164,10 @@ def test_create_migration_revision__published_with_revision(self, schema_migrate return_value=Mock(version_id=CollectionVersionId()) ) - schema_migrate.collection_migrate(revision.collection_id.id, revision.version_id.id, False) + schema_migrate.collection_migrate(revision.collection_id.id, revision.version_id.id) schema_migrate.business_logic.create_collection_version.assert_not_called() - schema_migrate.collection_migrate(published.collection_id.id, published.version_id.id, False) + schema_migrate.collection_migrate(published.collection_id.id, published.version_id.id) schema_migrate.business_logic.create_collection_version.assert_called_once() def test_create_migration_revision__published_no_revision(self, schema_migrate_and_collections): @@ -203,5 +180,5 @@ def test_create_migration_revision__published_no_revision(self, schema_migrate_a return_value=Mock(version_id=CollectionVersionId()) ) - schema_migrate.collection_migrate(published.collection_id.id, published.version_id.id, False) + schema_migrate.collection_migrate(published.collection_id.id, published.version_id.id) schema_migrate.business_logic.create_collection_version.assert_called_once() diff --git a/tests/unit/processing/schema_migration/test_gather_collections.py b/tests/unit/processing/schema_migration/test_gather_collections.py index c270b8f62a2f0..887be2fb8f7b5 100644 --- a/tests/unit/processing/schema_migration/test_gather_collections.py +++ b/tests/unit/processing/schema_migration/test_gather_collections.py @@ -5,16 +5,14 @@ def test_with_revision(self, schema_migrate_and_collections): published, revision = collections["revision"] # get_collections is called twice schema_migrate.business_logic.get_collections.side_effect = [[published], [revision]] - _, response = schema_migrate.gather_collections(auto_publish=True) + _, response = schema_migrate.gather_collections() assert len(response) == 2 assert { - "can_publish": "True", "collection_id": published.collection_id.id, "collection_version_id": published.version_id.id, "execution_id": "test-execution-arn", } in response assert { - "can_publish": "False", "collection_id": revision.collection_id.id, "collection_version_id": revision.version_id.id, "execution_id": "test-execution-arn", @@ -25,16 +23,14 @@ def test_with_migration_revision(self, schema_migrate_and_collections): published, migration_revision = collections["migration_revision"] # get_collections is called twice schema_migrate.business_logic.get_collections.side_effect = [[published], [migration_revision]] - _, response = schema_migrate.gather_collections(auto_publish=True) + _, response = schema_migrate.gather_collections() assert len(response) == 1 assert { - "can_publish": "True", "collection_id": published.collection_id.id, "collection_version_id": published.version_id.id, "execution_id": "test-execution-arn", } not in response assert { - "can_publish": "True", "collection_id": migration_revision.collection_id.id, "collection_version_id": migration_revision.version_id.id, "execution_id": "test-execution-arn", @@ -45,9 +41,8 @@ def test_with_published(self, schema_migrate_and_collections): published = collections["published"] # get_collections is called twice schema_migrate.business_logic.get_collections.side_effect = [published, []] - _, response = schema_migrate.gather_collections(auto_publish=True) + _, response = schema_migrate.gather_collections() assert { - "can_publish": "True", "collection_id": published[0].collection_id.id, "collection_version_id": published[0].version_id.id, "execution_id": "test-execution-arn", @@ -59,16 +54,15 @@ def test_with_private(self, schema_migrate_and_collections): private = collections["private"] # get_collections is called twice schema_migrate.business_logic.get_collections.side_effect = [[], private] - _, response = schema_migrate.gather_collections(auto_publish=True) + _, response = schema_migrate.gather_collections() assert { - "can_publish": "False", "collection_id": private[0].collection_id.id, "collection_version_id": private[0].version_id.id, "execution_id": "test-execution-arn", } in response assert len(response) == 1 - def test_with_auto_publish_false(self, schema_migrate_and_collections): + def test_with_mixed_collection_types(self, schema_migrate_and_collections): schema_migrate, collections = schema_migrate_and_collections published_no_revision = collections["published"][0] private = collections["private"][0] @@ -79,35 +73,30 @@ def test_with_auto_publish_false(self, schema_migrate_and_collections): [published_no_revision, published_with_non_migration_revision, published_with_migration_revision], [private, revision, migration_revision], ] - _, response = schema_migrate.gather_collections(auto_publish=False) + _, response = schema_migrate.gather_collections() assert len(response) == 5 for obj in [ { - "can_publish": "False", "collection_id": published_no_revision.collection_id.id, "collection_version_id": published_no_revision.version_id.id, "execution_id": "test-execution-arn", }, { - "can_publish": "False", "collection_id": private.collection_id.id, "collection_version_id": private.version_id.id, "execution_id": "test-execution-arn", }, { - "can_publish": "False", "collection_id": revision.collection_id.id, "collection_version_id": revision.version_id.id, "execution_id": "test-execution-arn", }, { - "can_publish": "False", "collection_id": published_with_non_migration_revision.collection_id.id, "collection_version_id": published_with_non_migration_revision.version_id.id, "execution_id": "test-execution-arn", }, { - "can_publish": "False", "collection_id": migration_revision.collection_id.id, "collection_version_id": migration_revision.version_id.id, "execution_id": "test-execution-arn", diff --git a/tests/unit/processing/schema_migration/test_publish_and_cleanup.py b/tests/unit/processing/schema_migration/test_log_errors_and_cleanup.py similarity index 71% rename from tests/unit/processing/schema_migration/test_publish_and_cleanup.py rename to tests/unit/processing/schema_migration/test_log_errors_and_cleanup.py index 071b4b0aa4a49..89dd66e8e7761 100644 --- a/tests/unit/processing/schema_migration/test_publish_and_cleanup.py +++ b/tests/unit/processing/schema_migration/test_log_errors_and_cleanup.py @@ -6,10 +6,9 @@ from tests.unit.processing.schema_migration.conftest import make_mock_collection_version, make_mock_dataset_version -def factory_download_file(can_publish): +def factory_download_file(): def download_file(bucket, key_name, local_path): contents = { - "can_publish": str(can_publish), "datasets": [ { "dataset_id": "dataset_id_1", @@ -24,7 +23,7 @@ def download_file(bucket, key_name, local_path): "dataset_version_id": "prev_non_migrated_dataset_version_id", }, ], - # these datasets populate the processed_dataset variable in the publish_and_cleanup function + # these datasets populate the processed_dataset variable in the log_errors_and_cleanup function } with open(local_path, "w") as f: f.write(json.dumps(contents)) @@ -41,9 +40,9 @@ def mock_json_dump(response, f, **kwargs): @patch("backend.layers.processing.schema_migration.json.dump", side_effect=mock_json_dump) -class TestPublishAndCleanup: +class TestLogErrorsAndCleanup: def test_OK(self, mock_json, schema_migrate): - schema_migrate.business_logic.s3_provider.download_file = factory_download_file(True) + schema_migrate.business_logic.s3_provider.download_file = factory_download_file() schema_migrate.business_logic.s3_provider.delete_files = Mock() datasets = [ make_mock_dataset_version( @@ -55,11 +54,10 @@ def test_OK(self, mock_json, schema_migrate): collection_version = make_mock_collection_version(datasets) schema_migrate.business_logic.get_collection_version.return_value = collection_version - errors = schema_migrate.publish_and_cleanup(collection_version.version_id.id) + errors = schema_migrate.log_errors_and_cleanup(collection_version.version_id.id) assert errors == [] - schema_migrate.business_logic.publish_collection_version.assert_called_once_with(collection_version.version_id) schema_migrate.s3_provider.delete_files.assert_any_call( - "artifact-bucket", ["schema_migration/test-execution-arn/publish_and_cleanup/collection_id.json"] + "artifact-bucket", ["schema_migration/test-execution-arn/log_errors_and_cleanup/collection_id.json"] ) schema_migrate.s3_provider.delete_files.assert_any_call( "artifact-bucket", @@ -67,7 +65,7 @@ def test_OK(self, mock_json, schema_migrate): ) def test_with_errors(self, mock_json, schema_migrate): - schema_migrate.business_logic.s3_provider.download_file = factory_download_file(True) + schema_migrate.business_logic.s3_provider.download_file = factory_download_file() schema_migrate.business_logic.s3_provider.delete_files = Mock() failed_dataset = make_mock_dataset_version( dataset_id="dataset_id_2", @@ -87,7 +85,7 @@ def test_with_errors(self, mock_json, schema_migrate): collection_version = make_mock_collection_version(datasets) schema_migrate.business_logic.get_collection_version.return_value = collection_version - errors = schema_migrate.publish_and_cleanup(collection_version.version_id.id) + errors = schema_migrate.log_errors_and_cleanup(collection_version.version_id.id) assert len(errors) == 2 assert { "message": failed_dataset.status.validation_message, @@ -107,9 +105,8 @@ def test_with_errors(self, mock_json, schema_migrate): "dataset_id": non_migrated_dataset.dataset_id.id, "rollback": False, } in errors - schema_migrate.business_logic.publish_collection_version.assert_not_called() schema_migrate.s3_provider.delete_files.assert_any_call( - "artifact-bucket", ["schema_migration/test-execution-arn/publish_and_cleanup/collection_id.json"] + "artifact-bucket", ["schema_migration/test-execution-arn/log_errors_and_cleanup/collection_id.json"] ) schema_migrate.s3_provider.delete_files.assert_any_call( "artifact-bucket", @@ -119,45 +116,19 @@ def test_with_errors(self, mock_json, schema_migrate): ], ) - def test_can_not_publish(self, mock_json, schema_migrate): - schema_migrate.business_logic.s3_provider.download_file = factory_download_file(False) - schema_migrate.business_logic.s3_provider.delete_files = Mock() - dataset_status = dict(processing_status=DatasetProcessingStatus.SUCCESS) - metadata = dict(schema_version="1.0.0") - collection_version = make_mock_collection_version( - [ - make_mock_dataset_version( - dataset_id="dataset_id_1", - version_id="new_successful_dataset_version_id", - status=dataset_status, - metadata=metadata, - ) - ] - ) - schema_migrate.business_logic.get_collection_version.return_value = collection_version - errors = schema_migrate.publish_and_cleanup(collection_version.version_id.id) - assert errors == [] - schema_migrate.business_logic.publish_collection_version.assert_not_called() - schema_migrate.s3_provider.delete_files.assert_any_call( - "artifact-bucket", ["schema_migration/test-execution-arn/publish_and_cleanup/collection_id.json"] - ) - schema_migrate.s3_provider.delete_files.assert_any_call( - "artifact-bucket", ["prev_successful_dataset_version_id/migrated.h5ad"] - ) - def test_skip_unprocessed_datasets(self, mock_json, schema_migrate): """ - Test that datasets that do not appear in the processed_datasets variable in publish_and_cleanup are skipped + Test that datasets that do not appear in the processed_datasets variable in log_errors_and_cleanup are skipped """ - schema_migrate.business_logic.s3_provider.download_file = factory_download_file(False) + schema_migrate.business_logic.s3_provider.download_file = factory_download_file() schema_migrate.business_logic.s3_provider.delete_files = Mock() collection_version = make_mock_collection_version([make_mock_dataset_version()]) schema_migrate.business_logic.get_collection_version.return_value = collection_version schema_migrate.check_dataset_is_latest_schema_version = Mock(return_value=True) - errors = schema_migrate.publish_and_cleanup(collection_version.version_id.id) + errors = schema_migrate.log_errors_and_cleanup(collection_version.version_id.id) assert errors == [] schema_migrate.check_dataset_is_latest_schema_version.assert_not_called() schema_migrate.s3_provider.delete_files.assert_any_call( - "artifact-bucket", ["schema_migration/test-execution-arn/publish_and_cleanup/collection_id.json"] + "artifact-bucket", ["schema_migration/test-execution-arn/log_errors_and_cleanup/collection_id.json"] ) schema_migrate.s3_provider.delete_files.assert_any_call("artifact-bucket", []) From 5afa5396d9ac97e2f16813466018a4c0480f1146 Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Mon, 17 Jun 2024 14:56:39 -0700 Subject: [PATCH 3/6] fix: optimize database queries (#7191) --- backend/api_server/db.py | 2 + backend/common/utils/db_session.py | 1 + backend/common/utils/timer.py | 16 ++ .../api/v1/curation/collections/actions.py | 4 +- .../api/v1/curation/collections/common.py | 16 +- backend/layers/persistence/persistence.py | 165 ++++++++++-------- 6 files changed, 131 insertions(+), 73 deletions(-) create mode 100644 backend/common/utils/timer.py diff --git a/backend/api_server/db.py b/backend/api_server/db.py index 50ae640557af7..a15e136d36379 100644 --- a/backend/api_server/db.py +++ b/backend/api_server/db.py @@ -4,6 +4,8 @@ from backend.common.utils.db_session import db_session_manager +# TODO remove from code base + def dbconnect(func): @wraps(func) diff --git a/backend/common/utils/db_session.py b/backend/common/utils/db_session.py index 95b7fc4159f1d..b5025d2501101 100644 --- a/backend/common/utils/db_session.py +++ b/backend/common/utils/db_session.py @@ -1,3 +1,4 @@ +# TODO remove from code base import logging from contextlib import contextmanager diff --git a/backend/common/utils/timer.py b/backend/common/utils/timer.py new file mode 100644 index 0000000000000..f1f82a34373b0 --- /dev/null +++ b/backend/common/utils/timer.py @@ -0,0 +1,16 @@ +import logging +import time +from contextlib import contextmanager + +logging.basicConfig(level=logging.INFO) + + +@contextmanager +def log_time_taken(description: str = "Code block"): + start_time = time.time() + try: + yield + finally: + end_time = time.time() + elapsed_time = end_time - start_time + logging.info(dict(type="METRIC", details=dict(description=description, time=elapsed_time, unit="seconds"))) diff --git a/backend/curation/api/v1/curation/collections/actions.py b/backend/curation/api/v1/curation/collections/actions.py index 0554dea51e39d..f52e1d8c07c46 100644 --- a/backend/curation/api/v1/curation/collections/actions.py +++ b/backend/curation/api/v1/curation/collections/actions.py @@ -1,3 +1,5 @@ +import logging + from flask import jsonify, make_response import backend.common.doi as doi @@ -37,7 +39,7 @@ def get(visibility: str, token_info: dict, curator: str = None): else: filters["curator_name"] = curator - print(filters) + logging.info(filters) resp_collections = [] for collection_version in get_business_logic().get_collections(CollectionQueryFilter(**filters)): diff --git a/backend/curation/api/v1/curation/collections/common.py b/backend/curation/api/v1/curation/collections/common.py index f721acf53a7ef..852ab329a5208 100644 --- a/backend/curation/api/v1/curation/collections/common.py +++ b/backend/curation/api/v1/curation/collections/common.py @@ -195,11 +195,20 @@ def reshape_datasets_for_curation_api( as_version: bool = False, is_published: bool = False, ) -> List[dict]: + business_logic = get_business_logic() active_datasets = [] + dataset_version_ids = [] + dataset_versions = [] for dv in datasets: - dataset_version = get_business_logic().get_dataset_version(dv) if isinstance(dv, DatasetVersionId) else dv + if isinstance(dv, DatasetVersion): + dataset_versions.append(dv) + else: + dataset_version_ids.append(dv) + if dataset_version_ids: + dataset_versions.extend(business_logic.database_provider.get_dataset_versions_by_id(dataset_version_ids)) + for dv in dataset_versions: reshaped_dataset = reshape_dataset_for_curation_api( - dataset_version, use_canonical_url, preview, as_canonical=not as_version, is_published=is_published + dv, use_canonical_url, preview, as_canonical=not as_version, is_published=is_published ) active_datasets.append(reshaped_dataset) return active_datasets @@ -275,7 +284,8 @@ def reshape_dataset_for_curation_datasets_index_api( """ Create the response shape for the curation datasets index API response. Handles shape for both public and private requests. - :param visibility: the requested visibility of the datasets to be included in the dataset index response; either PUBLIC or PRIVATE. + :param visibility: the requested visibility of the datasets to be included in the dataset index response; either + PUBLIC or PRIVATE. :param collection_version: the collection version of the dataset to be included in the API response. :param dataset_version: a dataset version to be included in the API response. :return: A dictionary shaped for inclusion in the datasets index API response. diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 90c5e5e5d5ced..d9e6393cbfd62 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -6,7 +6,8 @@ from datetime import datetime from typing import Any, Iterable, List, Optional, Tuple, Union -from sqlalchemy import create_engine, delete +from server_timing import Timing as ServerTiming +from sqlalchemy import create_engine, delete, update from sqlalchemy.exc import ProgrammingError, SQLAlchemyError from sqlalchemy.orm import Session, sessionmaker from tenacity import retry @@ -14,6 +15,7 @@ from tenacity.wait import wait_fixed from backend.common.corpora_config import CorporaDbConfig +from backend.common.utils.timer import log_time_taken from backend.layers.business.exceptions import CollectionIsPublishedException, DatasetIsPublishedException from backend.layers.common.entities import ( CanonicalCollection, @@ -281,16 +283,10 @@ def get_dataset_versions_by_id( canonical_ids.append(version.dataset_id) artifact_ids.extend(version.artifacts) - if get_tombstoned: - canonical_dataset_query = session.query(DatasetTable).filter(DatasetTable.id.in_(canonical_ids)) - else: - canonical_dataset_query = ( - session.query(DatasetTable) - .filter(DatasetTable.id.in_(canonical_ids)) - .filter(DatasetTable.tombstone.is_(False)) - ) + canonical_dataset_query = session.query(DatasetTable).filter(DatasetTable.id.in_(canonical_ids)) + if not get_tombstoned: + canonical_dataset_query = canonical_dataset_query.filter(DatasetTable.tombstone.is_(False)) canonical_datasets = canonical_dataset_query.all() - canonical_map = {canonical_dataset.id: canonical_dataset for canonical_dataset in canonical_datasets} artifacts = session.query(DatasetArtifactTable).filter(DatasetArtifactTable.id.in_(artifact_ids)).all() @@ -377,12 +373,15 @@ def get_all_versions_for_collection( with self._manage_session() as session: version_rows = session.query(CollectionVersionTable).filter_by(collection_id=collection_id.id).all() canonical_collection = self.get_canonical_collection(collection_id) - versions = list() - for i in range(len(version_rows)): - datasets = self.get_dataset_versions_by_id( - [DatasetVersionId(str(id)) for id in version_rows[i].datasets], get_tombstoned=get_tombstoned - ) - version = self._row_to_collection_version_with_datasets(version_rows[i], canonical_collection, datasets) + versions = [] + dataset_version_ids = [DatasetVersionId(str(_id)) for vr in version_rows for _id in vr.datasets] + datasets = { + str(dv.version_id): dv + for dv in self.get_dataset_versions_by_id(dataset_version_ids, get_tombstoned=get_tombstoned) + } + for row in version_rows: + ds = [datasets[str(id)] for id in row.datasets] + version = self._row_to_collection_version_with_datasets(row, canonical_collection, ds) versions.append(version) return versions @@ -397,12 +396,15 @@ def get_unpublished_versions_for_collection( session.query(CollectionVersionTable).filter_by(collection_id=collection_id.id, published_at=None).all() ) canonical_collection = self.get_canonical_collection(collection_id) - versions = list() - for i in range(len(version_rows)): - datasets = self.get_dataset_versions_by_id( - [DatasetVersionId(str(id)) for id in version_rows[i].datasets], get_tombstoned=get_tombstoned - ) - version = self._row_to_collection_version_with_datasets(version_rows[i], canonical_collection, datasets) + versions = [] + dataset_version_ids = [DatasetVersionId(str(_id)) for vr in version_rows for _id in vr.datasets] + datasets = { + str(dv.version_id): dv + for dv in self.get_dataset_versions_by_id(dataset_version_ids, get_tombstoned=get_tombstoned) + } + for row in version_rows: + ds = [datasets[str(id)] for id in row.datasets] + version = self._row_to_collection_version_with_datasets(row, canonical_collection, ds) versions.append(version) return versions @@ -412,33 +414,42 @@ def get_all_collections_versions(self, get_tombstoned: bool = False) -> Iterable TODO: for performance reasons, it might be necessary to add a filtering parameter here. """ with self._manage_session() as session: - versions = session.query(CollectionVersionTable).all() + with ServerTiming.time("get-cv-all"), log_time_taken("get-cv-all"): + versions = session.query(CollectionVersionTable).all() + + with ServerTiming.time("get-cc-by-id"), log_time_taken("get-cc-by-id"): + if get_tombstoned: + all_canonical_collections = session.query(CollectionTable) + else: + all_canonical_collections = session.query(CollectionTable).filter( + CollectionTable.tombstone.isnot(True) + ) - # Create a canonical mapping - if get_tombstoned: - all_canonical_collections = session.query(CollectionTable) - else: - all_canonical_collections = session.query(CollectionTable).filter(CollectionTable.tombstone.isnot(True)) - - all_canonical_map = dict() - for collection_row in all_canonical_collections.all(): - all_canonical_map[str(collection_row.id)] = CanonicalCollection( - CollectionId(str(collection_row.id)), - None if collection_row.version_id is None else CollectionVersionId(str(collection_row.version_id)), - collection_row.originally_published_at, - collection_row.revised_at, - collection_row.tombstone, - ) + all_canonical_map = dict() + for collection_row in all_canonical_collections.all(): + all_canonical_map[str(collection_row.id)] = CanonicalCollection( + CollectionId(str(collection_row.id)), + ( + None + if collection_row.version_id is None + else CollectionVersionId(str(collection_row.version_id)) + ), + collection_row.originally_published_at, + collection_row.revised_at, + collection_row.tombstone, + ) result = [] - all_dataset_tombstones = { - str(dataset.id) - for dataset in session.query(DatasetTable).filter(DatasetTable.tombstone.is_(True)).all() - } - all_dataset_version_mappings = { - str(dataset_version.id): str(dataset_version.dataset_id) - for dataset_version in session.query(DatasetVersionTable).all() - } + with ServerTiming.time("get-cdt-all"), log_time_taken("get-cdt-all"): + all_dataset_tombstones = { + str(dataset.id) + for dataset in session.query(DatasetTable).filter(DatasetTable.tombstone.is_(True)).all() + } + with ServerTiming.time("get-dv-all"), log_time_taken("get-dv-all"): + all_dataset_version_mappings = { + str(dataset_version.id): str(dataset_version.dataset_id) + for dataset_version in session.query(DatasetVersionTable).all() + } for v in versions: include_dataset_version_ids = [] if str(v.collection_id) in all_canonical_map: @@ -461,21 +472,25 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It will be present in the CollectionVersion.datasets array for active (mapped) Collection versions. """ with self._manage_session() as session: - if get_tombstoned: - canonical_collections = session.query(CollectionTable).filter(CollectionTable.version_id.isnot(None)) - else: - canonical_collections = ( - session.query(CollectionTable) - .filter(CollectionTable.version_id.isnot(None)) - .filter_by(tombstone=False) - ) + with ServerTiming.time("get-cc-all"), log_time_taken("get-cc-all"): + if get_tombstoned: + canonical_collections = session.query(CollectionTable).filter( + CollectionTable.version_id.isnot(None) + ) + else: + canonical_collections = ( + session.query(CollectionTable) + .filter(CollectionTable.version_id.isnot(None)) + .filter_by(tombstone=False) + ) - mapped_version_ids = {cc.version_id: cc for cc in canonical_collections.all()} - versions = ( - session.query(CollectionVersionTable) - .filter(CollectionVersionTable.id.in_(mapped_version_ids.keys())) - .all() - ) # noqa + mapped_version_ids = {cc.version_id: cc for cc in canonical_collections.all()} + with ServerTiming.time("get-cv-by-id"), log_time_taken("get-cv-by-id"): + versions = ( + session.query(CollectionVersionTable) + .filter(CollectionVersionTable.id.in_(mapped_version_ids.keys())) + .all() + ) # noqa for version in versions: canonical_row = mapped_version_ids[version.id] @@ -620,11 +635,18 @@ def _delete_dataset_version_and_artifact_rows( """ Delete DatasetVersionTable rows (and their dependent DatasetArtifactTable rows) """ + dataset_version_ids = [] + artifact_ids = [] for d_v_row in dataset_version_rows: - ids = [str(_id) for _id in d_v_row.artifacts] - artifact_delete_statement = delete(DatasetArtifactTable).where(DatasetArtifactTable.id.in_(ids)) - session.execute(artifact_delete_statement) - session.delete(d_v_row) + dataset_version_ids.append(str(d_v_row.id)) + for artifact in d_v_row.artifacts: + artifact_ids.append(str(artifact)) + artifact_delete_statement = delete(DatasetArtifactTable).where(DatasetArtifactTable.id.in_(artifact_ids)) + session.execute(artifact_delete_statement) + dataset_version_delete_statement = delete(DatasetVersionTable).where( + DatasetVersionTable.id.in_(dataset_version_ids) + ) + session.execute(dataset_version_delete_statement) session.flush() def finalize_collection_version( @@ -679,11 +701,16 @@ def finalize_collection_version( # get all dataset versions for the datasets that are being tombstoned dataset_version_ids_to_delete_from_s3 = [] if dataset_ids_to_tombstone: - datasets = session.query(DatasetTable).filter(DatasetTable.id.in_(dataset_ids_to_tombstone)).all() - for dataset in datasets: - dataset.tombstone = True - dataset_all_versions = session.query(DatasetVersionTable).filter_by(dataset_id=dataset.id).all() - dataset_version_ids_to_delete_from_s3.extend([dv.id for dv in dataset_all_versions]) + tombstone_dataset_statement = ( + update(DatasetTable).where(DatasetTable.id.in_(dataset_ids_to_tombstone)).values(tombstone=True) + ) + session.execute(tombstone_dataset_statement) + dataset_all_version_ids = ( + session.query(DatasetVersionTable.id) + .filter(DatasetVersionTable.dataset_id.in_(dataset_ids_to_tombstone)) + .all() + ) + dataset_version_ids_to_delete_from_s3.extend(str(dv_id) for dv_id in dataset_all_version_ids) # update dataset versions for datasets that are not being tombstoned dataset_version_ids = session.query(CollectionVersionTable.datasets).filter_by(id=version_id.id).one()[0] From 71d403677ae2ecd643994eb0172fe1b4444b134f Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Mon, 17 Jun 2024 17:04:23 -0700 Subject: [PATCH 4/6] fix: remove ServerTiming from persistence layer (#7207) merging to fix dev --- backend/layers/persistence/persistence.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index d9e6393cbfd62..02ef4ea03d41a 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -6,7 +6,6 @@ from datetime import datetime from typing import Any, Iterable, List, Optional, Tuple, Union -from server_timing import Timing as ServerTiming from sqlalchemy import create_engine, delete, update from sqlalchemy.exc import ProgrammingError, SQLAlchemyError from sqlalchemy.orm import Session, sessionmaker @@ -414,10 +413,10 @@ def get_all_collections_versions(self, get_tombstoned: bool = False) -> Iterable TODO: for performance reasons, it might be necessary to add a filtering parameter here. """ with self._manage_session() as session: - with ServerTiming.time("get-cv-all"), log_time_taken("get-cv-all"): + with log_time_taken("get-cv-all"): versions = session.query(CollectionVersionTable).all() - with ServerTiming.time("get-cc-by-id"), log_time_taken("get-cc-by-id"): + with log_time_taken("get-cc-by-id"): if get_tombstoned: all_canonical_collections = session.query(CollectionTable) else: @@ -440,12 +439,12 @@ def get_all_collections_versions(self, get_tombstoned: bool = False) -> Iterable ) result = [] - with ServerTiming.time("get-cdt-all"), log_time_taken("get-cdt-all"): + with log_time_taken("get-cdt-all"): all_dataset_tombstones = { str(dataset.id) for dataset in session.query(DatasetTable).filter(DatasetTable.tombstone.is_(True)).all() } - with ServerTiming.time("get-dv-all"), log_time_taken("get-dv-all"): + with log_time_taken("get-dv-all"): all_dataset_version_mappings = { str(dataset_version.id): str(dataset_version.dataset_id) for dataset_version in session.query(DatasetVersionTable).all() @@ -472,7 +471,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It will be present in the CollectionVersion.datasets array for active (mapped) Collection versions. """ with self._manage_session() as session: - with ServerTiming.time("get-cc-all"), log_time_taken("get-cc-all"): + with log_time_taken("get-cc-all"): if get_tombstoned: canonical_collections = session.query(CollectionTable).filter( CollectionTable.version_id.isnot(None) @@ -485,7 +484,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It ) mapped_version_ids = {cc.version_id: cc for cc in canonical_collections.all()} - with ServerTiming.time("get-cv-by-id"), log_time_taken("get-cv-by-id"): + with log_time_taken("get-cv-by-id"): versions = ( session.query(CollectionVersionTable) .filter(CollectionVersionTable.id.in_(mapped_version_ids.keys())) From 766110e11b96c59ef834da9d20ffea2a1d2da68f Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Thu, 20 Jun 2024 07:37:58 -0700 Subject: [PATCH 5/6] fix: cloudfront 504 error for long backend requests. (#7189) --- .happy/terraform/modules/ecs-stack/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.happy/terraform/modules/ecs-stack/main.tf b/.happy/terraform/modules/ecs-stack/main.tf index 3710ee1f47c91..adcb1277a5331 100644 --- a/.happy/terraform/modules/ecs-stack/main.tf +++ b/.happy/terraform/modules/ecs-stack/main.tf @@ -31,7 +31,7 @@ locals { backend_wmg_workers = var.backend_wmg_workers backend_cmd = ["gunicorn", "--worker-class", "gevent", "--workers", "${local.backend_workers}", "--bind", "0.0.0.0:5000", "backend.api_server.app:app", "--max-requests", "10000", "--timeout", "180", - "--keep-alive", "61", "--log-level", "info"] + "--keep-alive", "100", "--log-level", "info"] backend_de_cmd = ["gunicorn", "--worker-class", "gevent", "--workers", "${local.backend_de_workers}", "--bind", "0.0.0.0:5000", "backend.de.server.app:app", "--max-requests", "10000", "--timeout", "540", "--keep-alive", "61", "--log-level", "info"] From 762aeefb5858535674bc96299988aa58963f6c6d Mon Sep 17 00:00:00 2001 From: Emanuele Bezzi Date: Thu, 20 Jun 2024 09:06:54 -0700 Subject: [PATCH 6/6] fix: add more ignored keys in the Census Models page (#7215) --- .../components/Project/ProjectButtons/index.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx index 755804501202f..0ce5f2f37b2fe 100644 --- a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx @@ -7,7 +7,12 @@ import { ButtonsColumn, ButtonsRow, StyledButton } from "./style"; import DetailItem from "../../DetailItem"; import { DATA_TYPE_TO_EMBEDDING } from ".."; -const IGNORE_DIFFERENT_METADATA_KEYS = ["model_link", "id"]; +const IGNORE_DIFFERENT_METADATA_KEYS = [ + "model_link", + "id", + "relative_uri", + "indexes", +]; const ATTRIBUTE_TO_LABEL: Record = { experiment_name: "Organism", n_cells: "Cells",