Skip to content

Commit

Permalink
chore: prod deployment, jun 28th
Browse files Browse the repository at this point in the history
  • Loading branch information
ebezzi authored Jun 28, 2024
2 parents bd32b39 + 1c82f15 commit cce15b5
Show file tree
Hide file tree
Showing 70 changed files with 16,893 additions and 12,590 deletions.
4 changes: 2 additions & 2 deletions backend/common/census_cube/data/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def expression_summary_and_cell_counts_diffexp(self, criteria: BaseQueryCriteria
)

cell_counts_diffexp_df = self.cell_counts_diffexp_df(criteria)
key = "group_id_simple" if use_simple else "group_id"
cell_counts_group_id_key = "group_id_simple" if use_simple else "group_id"
cube = (
self._snapshot.expression_summary_diffexp_simple_cube
if use_simple
else self._snapshot.expression_summary_diffexp_cube
)
group_ids = cell_counts_diffexp_df[key].unique().tolist()
group_ids = cell_counts_diffexp_df[cell_counts_group_id_key].unique().tolist()
return (
pd.concat(
cube.query(
Expand Down
19 changes: 19 additions & 0 deletions backend/de/api/de-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ paths:
schema:
type: object
properties:
exclude_overlapping_cells:
type: string
description: "This parameter specifies the method for handling overlapping cells between the two groups."
enum:
- retainBoth
- excludeOne
- excludeTwo
default: excludeTwo
queryGroup1Filters:
type: object
properties:
Expand Down Expand Up @@ -156,6 +164,7 @@ paths:
required:
- organism_ontology_term_id
required:
- exclude_overlapping_cells
- queryGroup1Filters
- queryGroup2Filters
responses:
Expand All @@ -167,13 +176,23 @@ paths:
type: object
required:
- differentialExpressionResults
- successCode
- n_overlap
properties:
snapshot_id:
$ref: "#/components/schemas/de_snapshot_id"
n_overlap:
description: ->
Number of overlapping populations between the two groups.
type: integer
successCode:
description: >
Indicates the success status of the operation.
0 means success, 1 means one of the groups has 0 cells after filtering out overlapping cells.
type: integer
enum:
- 0
- 1
differentialExpressionResults:
description: ->
Differential expression results
Expand Down
27 changes: 24 additions & 3 deletions backend/de/api/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def differentialExpression():

queryGroup1Filters = request["queryGroup1Filters"]
queryGroup2Filters = request["queryGroup2Filters"]
exclude_overlapping_cells = request["exclude_overlapping_cells"]

criteria1 = BaseQueryCriteria(**queryGroup1Filters)
criteria2 = BaseQueryCriteria(**queryGroup2Filters)
Expand All @@ -187,18 +188,23 @@ def differentialExpression():
q = CensusCubeQuery(snapshot, cube_query_params=None)

with ServerTiming.time("run differential expression"):
de_results, n_overlap = run_differential_expression(q, criteria1, criteria2)
de_results, n_overlap, successCode = run_differential_expression(
q, criteria1, criteria2, exclude_overlapping_cells
)

return jsonify(
dict(
snapshot_id=snapshot.snapshot_identifier,
differentialExpressionResults=de_results,
n_overlap=n_overlap,
successCode=successCode,
)
)


def run_differential_expression(q: CensusCubeQuery, criteria1, criteria2) -> Tuple[List[Dict], int]:
def run_differential_expression(
q: CensusCubeQuery, criteria1, criteria2, exclude_overlapping_cells
) -> Tuple[List[Dict], int]:
"""
Runs differential expression analysis between two sets of criteria.
Expand All @@ -212,11 +218,15 @@ def run_differential_expression(q: CensusCubeQuery, criteria1, criteria2) -> Tup
- q: CensusCubeQuery object
- criteria1: The first set of criteria for differential expression analysis.
- criteria2: The second set of criteria for differential expression analysis.
- exclude_overlapping_cells: A string specifying how overlapping cells should be handled.
Returns:
A tuple containing two elements:
- A list of dictionaries, each representing a gene and its differential expression metrics.
- An integer representing the number of overlapping populations between the two groups.
- An integer representing the success code of the differential expression analysis.
0: Success
1: No cells in one or both groups after filtering out overlapping cells
"""

# augment criteria1 and criteria2 with descendants if cell_type_ontology_term_ids is specified
Expand All @@ -242,11 +252,22 @@ def run_differential_expression(q: CensusCubeQuery, criteria1, criteria2) -> Tup
for col in cell_counts_logical_dims_exclude_dataset_id
if col in cell_counts1.columns and col in cell_counts2.columns
]

index1 = cell_counts1.set_index(filter_columns).index
index2 = cell_counts2.set_index(filter_columns).index
overlap_filter = index1.isin(index2)
n_overlap = int(cell_counts1[overlap_filter]["n_total_cells"].sum())

es_index1 = es1["group_id"]
es_index2 = es2["group_id"]
if exclude_overlapping_cells == "excludeOne":
es1 = es1[~es_index1.isin(es_index2)]
elif exclude_overlapping_cells == "excludeTwo":
es2 = es2[~es_index2.isin(es_index1)]

if es1.shape[0] == 0 or es2.shape[0] == 0:
return [], n_overlap, 1

es_agg1 = es1.groupby("gene_ontology_term_id").sum(numeric_only=True)
es_agg2 = es2.groupby("gene_ontology_term_id").sum(numeric_only=True)

Expand Down Expand Up @@ -286,7 +307,7 @@ def run_differential_expression(q: CensusCubeQuery, criteria1, criteria2) -> Tup
"adjusted_p_value": pval,
}
)
return statistics, n_overlap
return statistics, n_overlap, 0


def _get_cell_counts_for_query(q: CensusCubeQuery, criteria: BaseQueryCriteria) -> pd.DataFrame:
Expand Down
49 changes: 48 additions & 1 deletion backend/layers/business/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
NoPreviousDatasetVersionException,
)
from backend.layers.common import validation
from backend.layers.common.cleanup import sanitize
from backend.layers.common.cleanup import sanitize, sanitize_dataset_artifact_metadata_update
from backend.layers.common.entities import (
CanonicalCollection,
CollectionId,
Expand Down Expand Up @@ -463,6 +463,23 @@ def _assert_collection_version_unpublished(
raise CollectionIsPublishedException([f"Collection version {collection_version_id.id} is published"])
return collection_version

def _assert_dataset_version_processing_status(
self, dataset_version_id: DatasetVersionId, expected_status: DatasetProcessingStatus
) -> DatasetVersion:
"""
Ensures a dataset version is in the expected processing status.
:param dataset_version_id: The dataset version to check the processing status of.
:param expected_status: The expected processing status of the dataset version.
:return: The dataset version if it is in the expected processing status.
"""
dataset = self.database_provider.get_dataset_version(dataset_version_id)
if dataset.status.processing_status != expected_status:
raise DatasetInWrongStatusException(
f"Dataset {dataset_version_id.id} processing status must be {expected_status.name} but is {dataset.status.processing_status}."
)
return dataset

def create_empty_dataset(self, collection_version_id: CollectionVersionId) -> DatasetVersion:
"""
Creates an empty dataset that can be later used for ingestion
Expand Down Expand Up @@ -625,6 +642,36 @@ def set_dataset_metadata(self, dataset_version_id: DatasetVersionId, metadata: D
"""
self.database_provider.set_dataset_metadata(dataset_version_id, metadata)

def update_dataset_artifact_metadata(
self,
collection_version_id: CollectionVersionId,
dataset_version_id: DatasetVersionId,
metadata_update: DatasetArtifactMetadataUpdate,
) -> None:
"""
Validates dataset artifact metadata update and triggers corresponding updates. Currently only supports
updating dataset title.
:param collection_version_id: Collection of dataset to update.
:param dataset_version_id: Version ID of dataset to update.
:param metadata_update: Metadata update to apply.
"""
# Format submitted update values.
sanitize_dataset_artifact_metadata_update(metadata_update)

# Confirm update values are valid.
validation.verify_dataset_artifact_metadata_update(metadata_update)

# Dataset can only be updated if corresponding collection is unpublished.
self._assert_collection_version_unpublished(collection_version_id)

# Dataset can only be updated if its processing status is SUCCESS.
self._assert_dataset_version_processing_status(dataset_version_id, DatasetProcessingStatus.SUCCESS)

# Trigger update of dataset artifact.
collection_version = self.get_collection_version(collection_version_id)
self.trigger_dataset_artifact_update(collection_version, metadata_update, dataset_version_id)

def get_all_mapped_datasets(self) -> List[DatasetVersion]:
"""
Retrieves all the datasets from the database that belong to a published collection
Expand Down
11 changes: 10 additions & 1 deletion backend/layers/common/cleanup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Union

from backend.layers.business.entities import CollectionMetadataUpdate
from backend.layers.common.entities import CollectionMetadata
from backend.layers.common.entities import CollectionMetadata, DatasetArtifactMetadataUpdate


def strip_fields(metadata: Union[CollectionMetadata, CollectionMetadataUpdate]):
Expand All @@ -28,3 +28,12 @@ def sort_consortia(metadata: Union[CollectionMetadata, CollectionMetadataUpdate]
def sanitize(metadata: Union[CollectionMetadata, CollectionMetadataUpdate]):
strip_fields(metadata)
sort_consortia(metadata)


def sanitize_dataset_artifact_metadata_update(metadata: DatasetArtifactMetadataUpdate):
"""
Dataset title is currently the only field available for update via the FE and DP and
Discover APIs; strip whitespace from title.
"""
if metadata.title is not None:
metadata.title = metadata.title.strip()
63 changes: 45 additions & 18 deletions backend/layers/common/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from backend.layers.business.entities import CollectionMetadataUpdate
from backend.layers.business.exceptions import InvalidMetadataException
from backend.layers.common.entities import CollectionMetadata, Link
from backend.layers.common.entities import CollectionMetadata, DatasetArtifactMetadataUpdate, Link
from backend.layers.common.regex import CONTROL_CHARS, EMAIL_REGEX

control_char_re = re.compile(CONTROL_CHARS)
Expand All @@ -29,6 +29,33 @@
}


def _verify_field(
metadata: Union[CollectionMetadata, CollectionMetadataUpdate, DatasetArtifactMetadataUpdate],
key: str,
check_existence: bool,
errors: list,
) -> None:
"""
Verifies field exists and is not empty.
:param metadata: Metadata update to validate.
:param key: Field to validate in metadata update.
:param check_existence: If True, field must exist in metadata update.
:param errors: List of errors to append to.
"""
value = getattr(metadata, key)
if check_existence and value is None:
# if checks_existence is true, value cannot be None since it must be required
errors.append({"name": key, "reason": "Cannot be empty."})
elif value is not None and not value:
# In any case, if a value is defined, it cannot be falsey (aka blank)
errors.append({"name": key, "reason": "Cannot be blank."})
elif value is not None and (key == "name" or key == "title") and control_char_re.search(value):
errors.append({"name": key, "reason": "Invalid characters detected."})
else:
return value


def _verify_collection_metadata_fields(
metadata: Union[CollectionMetadata, CollectionMetadataUpdate], check_existence: bool, errors: list
) -> None:
Expand All @@ -39,35 +66,22 @@ def _verify_collection_metadata_fields(
- If the field is an email, it should be in the right format
"""

def check(key):
value = getattr(metadata, key)
if check_existence and value is None:
# if checks_existence is true, value cannot be None since it must be required
errors.append({"name": key, "reason": "Cannot be empty."})
elif value is not None and not value:
# In any case, if a value is defined, it cannot be falsey (aka blank)
errors.append({"name": key, "reason": "Cannot be blank."})
elif value is not None and key == "name" and control_char_re.search(value):
errors.append({"name": key, "reason": "Invalid characters detected."})
else:
return value

def verify_collection_consortia(metadata: Union[CollectionMetadata, CollectionMetadataUpdate], errors: list):
consortia = metadata.consortia
if consortia:
for consortium in consortia:
if consortium not in valid_consortia:
errors.append({"name": "consortia", "reason": "Invalid consortia."})

contact_email = check("contact_email")
contact_email = _verify_field(metadata, "contact_email", check_existence, errors)
if contact_email:
result = EMAIL_REGEX.match(contact_email)
if not result:
errors.append({"name": "contact_email", "reason": "Invalid format."})

check("description")
check("name")
check("contact_name")
_verify_field(metadata, "description", check_existence, errors)
_verify_field(metadata, "name", check_existence, errors)
_verify_field(metadata, "contact_name", check_existence, errors)

verify_collection_consortia(metadata, errors)

Expand Down Expand Up @@ -102,3 +116,16 @@ def verify_collection_metadata(metadata: CollectionMetadata, errors: list) -> No
if errors:
raise InvalidMetadataException(errors=errors)
verify_collection_links(metadata.links, errors)


def verify_dataset_artifact_metadata_update(metadata_update: DatasetArtifactMetadataUpdate) -> None:
"""
Verify values of `DatasetArtifactMetadataUpdate` are valid. Currently only title is available for
update via the FE and DP and Discover APIs; title must be specified, and must not contain special characters.
:param metadata_update: Metadata update to validate.
"""
errors = []
_verify_field(metadata_update, "title", True, errors)
if errors:
raise InvalidMetadataException(errors=errors)
34 changes: 34 additions & 0 deletions backend/portal/api/portal-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,40 @@ paths:
"404":
$ref: "#/components/responses/404"

/v1/collections/{collection_id}/datasets/{dataset_id}:
patch:
tags:
- datasets
summary: Update a dataset's metadata.
security:
- cxguserCookie: []
description: >-
Update a dataset's title and title of corresponding dataset artifacts.
operationId: backend.portal.api.portal_api.update_dataset
parameters:
- $ref: "#/components/parameters/path_collection_id"
- $ref: "#/components/parameters/path_dataset_id"
requestBody:
content:
application/json:
schema:
type: object
properties:
title:
type: string
description: title of the dataset
responses:
"202":
$ref: "#/components/responses/202"
"400":
$ref: "#/components/responses/400"
"401":
$ref: "#/components/responses/401"
"403":
$ref: "#/components/responses/403"
"405":
$ref: "#/components/responses/405"

/v1/collections/{collection_id}/order-datasets:
put:
tags:
Expand Down
Loading

0 comments on commit cce15b5

Please sign in to comment.