Skip to content

Commit

Permalink
deploy: aa74d10
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesSchaumont authored and github-actions[bot] committed Jun 10, 2024
1 parent e56b8a4 commit f6afb13
Show file tree
Hide file tree
Showing 576 changed files with 4,070 additions and 3,689 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# openpipelines 1.0.0-rc6

## BUG FIXES

* `dataflow/concatenate_h5mu`: fix regression bug where observations are no longer linked to the correct metadata
after concatenation (PR #807)

# openpipelines 1.0.0-rc5

## BUG FIXES
Expand Down
7 changes: 4 additions & 3 deletions src/base/openpipelinetestutils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ def _get_columns_in_all_modalities(annotation_object, axis_string: str):
if column_name not in global_columns]
extra_cols_to_remove += [column_name for column_name in column_names
if column_name in global_columns]
axis_setter(annotation_object, axis_getter(annotation_object).drop(extra_cols_to_remove,
axis="columns",
inplace=False))
if modality_name:
axis_setter(annotation_object, axis_getter(annotation_object).drop(extra_cols_to_remove,
axis="columns",
inplace=False))

for mod_name in modality_names:
modality = annotation_object.mod[mod_name]
Expand Down
4 changes: 2 additions & 2 deletions src/dataflow/concat/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ functionality:
- path: /resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu
platforms:
- type: docker
image: python:3.10-slim
image: python:3.11-slim
setup:
- type: apt
packages:
Expand All @@ -72,9 +72,9 @@ platforms:
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
packages:
- pandas~=2.1.1
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
test_setup:
- type: python
__merge__: [ /src/base/requirements/viashpy.yaml, .]
packages:
- muon
- type: native
Expand Down
4 changes: 2 additions & 2 deletions src/dataflow/concatenate_h5mu/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ functionality:
- path: /resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu
platforms:
- type: docker
image: python:3.10-slim
image: python:3.11-slim
setup:
- type: apt
packages:
Expand All @@ -71,9 +71,9 @@ platforms:
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
packages:
- pandas~=2.1.1
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
test_setup:
- type: python
__merge__: [ /src/base/requirements/viashpy.yaml, .]
packages:
- muon
- type: native
Expand Down
17 changes: 7 additions & 10 deletions src/dataflow/concatenate_h5mu/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) ->
is_duplicated = pool.map(nunique, iter(numpy_array))
return any(is_duplicated)

def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \
def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index) \
-> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]:
"""
Merge matrices by combining columns that have the same name.
Expand Down Expand Up @@ -152,7 +152,7 @@ def get_first_non_na_value_vector(df):
def split_conflicts_and_concatenated_columns(n_processes: int,
matrices: dict[str, pd.DataFrame],
column_names: Iterable[str],
align_to: pd.Index | None = None) -> \
align_to: pd.Index) -> \
tuple[dict[str, pd.DataFrame], pd.DataFrame]:
"""
Retrieve columns with the same name from a list of dataframes which are
Expand All @@ -172,8 +172,7 @@ def split_conflicts_and_concatenated_columns(n_processes: int,
join="outer", sort=False)
if any_row_contains_duplicate_values(n_processes, concatenated_columns):
concatenated_columns.columns = columns.keys() # Use the sample id as column name
if align_to is not None:
concatenated_columns = concatenated_columns.reindex(align_to, copy=False)
concatenated_columns = concatenated_columns.reindex(align_to, copy=False)
conflicts[f'conflict_{column_name}'] = concatenated_columns
else:
unique_values = get_first_non_na_value_vector(concatenated_columns)
Expand All @@ -182,8 +181,7 @@ def split_conflicts_and_concatenated_columns(n_processes: int,
return conflicts, pd.DataFrame(index=align_to)
concatenated_matrix = pd.concat(concatenated_matrix, join="outer",
axis=1, sort=False)
if align_to is not None:
concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False)
concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False)
return conflicts, concatenated_matrix

def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -220,8 +218,7 @@ def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnD
for matrix_name in matrices_to_parse:
matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()}
output_index = getattr(output, matrix_name).index
align_to = output_index if matrix_name == "var" else None
conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to)
conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, output_index)
if concatenated_matrix.empty:
concatenated_matrix.index = output_index
# Write the conflicts to the output
Expand All @@ -238,7 +235,7 @@ def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str |
other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData:

concat_modes = {
"move": None,
"move": "unique",
}
other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode)

Expand All @@ -247,7 +244,7 @@ def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str |
try:
mod_data[input_id] = mu.read_h5ad(input_file, mod=mod)
except KeyError as e: # Modality does not exist for this sample, skip it
if f"Unable to open object '{mod}' doesn't exist" not in str(e):
if f"Unable to synchronously open object (object '{mod}' doesn't exist)" not in str(e):
raise e
pass
check_observations_unique(mod_data.values())
Expand Down
Loading

0 comments on commit f6afb13

Please sign in to comment.