From dfa13b792bf379a935fdce128ca84c3051af1b5a Mon Sep 17 00:00:00 2001 From: axiomcura Date: Sat, 21 Sep 2024 19:24:27 -0600 Subject: [PATCH 01/14] updated error message --- pycytominer/cyto_utils/features.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index 1144a9a4..642199e1 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -115,9 +115,11 @@ def infer_cp_features( population_df.columns.str.startswith("Metadata_") ].tolist() - assert ( # noqa: S101 - len(features) > 0 - ), "No CP features found. Are you sure this dataframe is from CellProfiler?" + if len(features) == 0: + raise ValueError( + "No features found. Pycytominer expects CellProfiler features by default. " + "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + ) return features From 3f8f5644c68a4ab9d4a6ed36e3ec3304c5e0baef Mon Sep 17 00:00:00 2001 From: axiomcura Date: Mon, 23 Sep 2024 14:28:52 -0600 Subject: [PATCH 02/14] updated error messages in tests --- tests/test_cyto_utils/test_feature_infer.py | 8 ++++++-- tests/test_operations/test_correlation_threshold.py | 8 ++++++-- tests/test_operations/test_get_na_columns.py | 8 ++++++-- tests/test_operations/test_variance_threshold.py | 8 ++++++-- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tests/test_cyto_utils/test_feature_infer.py b/tests/test_cyto_utils/test_feature_infer.py index df839858..bb951986 100644 --- a/tests/test_cyto_utils/test_feature_infer.py +++ b/tests/test_cyto_utils/test_feature_infer.py @@ -39,10 +39,14 @@ def test_feature_infer(): def test_feature_infer_nocp(): - with pytest.raises(AssertionError) as nocp: + with pytest.raises(ValueError) as nocp: infer_cp_features(population_df=non_cp_data_df) - assert "No CP features found." in str(nocp.value) + expected_message = ( + "No features found. Pycytominer expects CellProfiler features by default. " + "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + ) + assert expected_message in str(nocp.value) def test_metadata_feature_infer(): diff --git a/tests/test_operations/test_correlation_threshold.py b/tests/test_operations/test_correlation_threshold.py index 2ca393cc..1753f22a 100644 --- a/tests/test_operations/test_correlation_threshold.py +++ b/tests/test_operations/test_correlation_threshold.py @@ -75,7 +75,7 @@ def test_correlation_threshold_samples(): def test_correlation_threshold_featureinfer(): - with pytest.raises(AssertionError) as nocp: + with pytest.raises(ValueError) as nocp: correlation_threshold_result = correlation_threshold( population_df=data_df, features="infer", @@ -84,7 +84,11 @@ def test_correlation_threshold_featureinfer(): method="pearson", ) - assert "No CP features found." in str(nocp.value) + expected_message = ( + "No features found. Pycytominer expects CellProfiler features by default. " + "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + ) + assert expected_message in str(nocp.value) data_cp_df = data_df.copy() data_cp_df.columns = [f"Cells_{x}" for x in data_df.columns] diff --git a/tests/test_operations/test_get_na_columns.py b/tests/test_operations/test_get_na_columns.py index 9c8bd557..a097c82b 100644 --- a/tests/test_operations/test_get_na_columns.py +++ b/tests/test_operations/test_get_na_columns.py @@ -67,9 +67,13 @@ def test_get_na_columns_sample(): def test_get_na_columns_featureinfer(): - with pytest.raises(AssertionError) as nocp: + with pytest.raises(ValueError) as nocp: get_na_columns( population_df=data_df, samples="all", features="infer", cutoff=0.1 ) - assert "No CP features found." in str(nocp.value) + expected_message = ( + "No features found. Pycytominer expects CellProfiler features by default. " + "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + ) + assert expected_message in str(nocp.value) diff --git a/tests/test_operations/test_variance_threshold.py b/tests/test_operations/test_variance_threshold.py index a1a19764..d4838ce1 100644 --- a/tests/test_operations/test_variance_threshold.py +++ b/tests/test_operations/test_variance_threshold.py @@ -102,12 +102,16 @@ def test_variance_threshold(): def test_variance_threshold_featureinfer(): unique_cut = 0.01 - with pytest.raises(AssertionError) as nocp: + with pytest.raises(ValueError) as nocp: excluded_features = variance_threshold( population_df=data_unique_test_df, features="infer", unique_cut=unique_cut ) - assert "No CP features found." in str(nocp.value) + expected_message = ( + "No features found. Pycytominer expects CellProfiler features by default. " + "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + ) + assert expected_message in str(nocp.value) data_cp_df = data_unique_test_df.copy() data_cp_df.columns = [f"Cells_{x}" for x in data_unique_test_df.columns] From 09340252bccb835ed71e9dc897daccfa92cde6e3 Mon Sep 17 00:00:00 2001 From: axiomcura Date: Tue, 24 Sep 2024 10:11:31 -0600 Subject: [PATCH 03/14] update metadata_features docstrings --- pycytominer/cyto_utils/features.py | 4 +++- pycytominer/normalize.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index 642199e1..19b5a416 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -89,7 +89,9 @@ def infer_cp_features( compartments : list of str, default ["Cells", "Nuclei", "Cytoplasm"] Compartments from which Cell Painting features were extracted. metadata : bool, default False - Whether or not to infer metadata features. + Whether or not to infer metadata features. Whether or not to infer metadata features. + If metadata is set to True, pycytominer will expect CellProfiler metadata features, + identified by feature names that begin with the `Metadata_` prefix. image_features : bool, default False Whether or not the profiles contain image features. diff --git a/pycytominer/normalize.py b/pycytominer/normalize.py index 7a83ca5f..8531bb02 100644 --- a/pycytominer/normalize.py +++ b/pycytominer/normalize.py @@ -41,7 +41,8 @@ def normalize( meta_features : list A list of strings corresponding to metadata column names in the `profiles` DataFrame. All features listed must be found in `profiles`. Defaults to "infer". - If "infer", then assume metadata features are those prefixed with "Metadata" + If "infer", pycytominer will expect CellProfiler metadata features, identified by + metadata feature names that begin with the `Metadata_` prefix." samples : str The metadata column values to use as a normalization reference. We often use control samples. The function uses a pd.query() function, so you should From e1164a5cb2ff435417e967d4fc47ddd049a149bb Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 26 Sep 2024 05:17:30 -0600 Subject: [PATCH 04/14] update docstrings for feature argument --- pycytominer/consensus.py | 2 +- pycytominer/cyto_utils/modz.py | 7 ++++--- pycytominer/cyto_utils/write_gct.py | 2 +- pycytominer/feature_select.py | 4 ++-- pycytominer/normalize.py | 8 ++++---- pycytominer/operations/correlation_threshold.py | 7 ++++--- pycytominer/operations/noise_removal.py | 7 ++++--- pycytominer/operations/variance_threshold.py | 7 ++++--- 8 files changed, 24 insertions(+), 20 deletions(-) diff --git a/pycytominer/consensus.py b/pycytominer/consensus.py index 7ea19714..e8d08405 100644 --- a/pycytominer/consensus.py +++ b/pycytominer/consensus.py @@ -35,7 +35,7 @@ def consensus( features : list A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume cell painting features are those + Defaults to "infer". If "infer", then assume CellProfiler features are those prefixed with "Cells", "Nuclei", or "Cytoplasm". output_file : str, optional If provided, will write consensus profiles to file. If not specified, will diff --git a/pycytominer/cyto_utils/modz.py b/pycytominer/cyto_utils/modz.py index 6e598ed5..6ea4c38c 100644 --- a/pycytominer/cyto_utils/modz.py +++ b/pycytominer/cyto_utils/modz.py @@ -98,9 +98,10 @@ def modz( a string or list of column(s) in the population dataframe that indicate replicate level information features : list, default "infer" - List of features present in the population dataframe [default: "infer"] - if "infer", then assume cell painting features are those that start with - "Cells_", "Nuclei_", or "Cytoplasm_". + A list of strings corresponding to feature measurement column names in the + `population_df` DataFrame. All features listed must be found in `population_df`. + Defaults to "infer". If "infer", then assume CellProfiler features are those + prefixed with "Cells", "Nuclei", or "Cytoplasm". method : str, default "spearman" indicating which correlation metric to use. min_weight : float, default 0.01 diff --git a/pycytominer/cyto_utils/write_gct.py b/pycytominer/cyto_utils/write_gct.py index 812811bd..89add1a4 100644 --- a/pycytominer/cyto_utils/write_gct.py +++ b/pycytominer/cyto_utils/write_gct.py @@ -32,7 +32,7 @@ def write_gct( features : list A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume cell painting features are those + Defaults to "infer". If "infer", then assume CellProfiler features are those prefixed with "Cells", "Nuclei", or "Cytoplasm". meta_features : list A list of strings corresponding to metadata column names in the `profiles` diff --git a/pycytominer/feature_select.py b/pycytominer/feature_select.py index c1fd87d1..7fc1efab 100644 --- a/pycytominer/feature_select.py +++ b/pycytominer/feature_select.py @@ -43,10 +43,10 @@ def feature_select( ---------- profiles : pandas.core.frame.DataFrame or file DataFrame or file of profiles. - features : list + features : list, default "infer" A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume cell painting features are those + Defaults to "infer". If "infer", then assume CellProfiler features are those prefixed with "Cells", "Nuclei", or "Cytoplasm". image_features: bool, default False Whether the profiles contain image features. diff --git a/pycytominer/normalize.py b/pycytominer/normalize.py index 8531bb02..102f7665 100644 --- a/pycytominer/normalize.py +++ b/pycytominer/normalize.py @@ -34,15 +34,15 @@ def normalize( features : list A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume cell painting features are those + Defaults to "infer". If "infer", then assume CellProfiler features are those prefixed with "Cells", "Nuclei", or "Cytoplasm". image_features: bool, default False Whether the profiles contain image features. meta_features : list A list of strings corresponding to metadata column names in the `profiles` DataFrame. All features listed must be found in `profiles`. Defaults to "infer". - If "infer", pycytominer will expect CellProfiler metadata features, identified by - metadata feature names that begin with the `Metadata_` prefix." + If "infer", then assume CellProfiler metadata features, identified by + column names that begin with the `Metadata_` prefix." samples : str The metadata column values to use as a normalization reference. We often use control samples. The function uses a pd.query() function, so you should @@ -115,7 +115,7 @@ def normalize( normalized_df = normalize( profiles=data_df, features=["x", "y", "z", "zz"], - meta_features="infer", + meta_features=["Metadata_plate", "Metadata_treatment"], samples="Metadata_treatment == 'control'", method="standardize" ) diff --git a/pycytominer/operations/correlation_threshold.py b/pycytominer/operations/correlation_threshold.py index 7c4522ba..a888a012 100644 --- a/pycytominer/operations/correlation_threshold.py +++ b/pycytominer/operations/correlation_threshold.py @@ -20,9 +20,10 @@ def correlation_threshold( population_df : pandas.core.frame.DataFrame DataFrame that includes metadata and observation features. features : list, default "infer" - List of features present in the population dataframe [default: "infer"] - if "infer", then assume cell painting features are those that start with - "Cells_", "Nuclei_", or "Cytoplasm_". + A list of strings corresponding to feature measurement column names in the + `population_df` DataFrame. All features listed must be found in `population_df`. + Defaults to "infer". If "infer", then assume CellProfiler features are those + prefixed with "Cells", "Nuclei", or "Cytoplasm". samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is diff --git a/pycytominer/operations/noise_removal.py b/pycytominer/operations/noise_removal.py index aba1a29e..e3e41923 100644 --- a/pycytominer/operations/noise_removal.py +++ b/pycytominer/operations/noise_removal.py @@ -22,9 +22,10 @@ def noise_removal( The list of unique perturbations corresponding to the rows in population_df. For example, perturb1_well1 and perturb1_well2 would both be "perturb1". features : list, default "infer" - List of features present in the population dataframe [default: "infer"] - if "infer", then assume cell painting features are those that start with - "Cells_", "Nuclei_", or "Cytoplasm_". + A list of strings corresponding to feature measurement column names in the + `population_df` DataFrame. All features listed must be found in `population_df`. + Defaults to "infer". If "infer", then assume CellProfiler features are those + prefixed with "Cells", "Nuclei", or "Cytoplasm". samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is diff --git a/pycytominer/operations/variance_threshold.py b/pycytominer/operations/variance_threshold.py index 67d3b767..72da751d 100644 --- a/pycytominer/operations/variance_threshold.py +++ b/pycytominer/operations/variance_threshold.py @@ -18,9 +18,10 @@ def variance_threshold( population_df : pandas.core.frame.DataFrame DataFrame that includes metadata and observation features. features : list, default "infer" - List of features present in the population dataframe [default: "infer"] - if "infer", then assume cell painting features are those that start with - "Cells_", "Nuclei_", or "Cytoplasm_". + A list of strings corresponding to feature measurement column names in the + `population_df` DataFrame. All features listed must be found in `population_df`. + Defaults to "infer". If "infer", then assume CellProfiler features are those + prefixed with "Cells", "Nuclei", or "Cytoplasm". samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is From 7a01a39e7800c21fe49a6850e1083d6285033286 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 26 Sep 2024 05:18:30 -0600 Subject: [PATCH 05/14] update features docstring and remove unused indent --- pycytominer/operations/get_na_columns.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pycytominer/operations/get_na_columns.py b/pycytominer/operations/get_na_columns.py index ad36c377..f288f2cd 100644 --- a/pycytominer/operations/get_na_columns.py +++ b/pycytominer/operations/get_na_columns.py @@ -14,9 +14,10 @@ def get_na_columns(population_df, features="infer", samples="all", cutoff=0.05): population_df : pandas.core.frame.DataFrame DataFrame that includes metadata and observation features. features : list, default "infer" - List of features present in the population dataframe [default: "infer"] - if "infer", then assume cell painting features are those that start with - "Cells_", "Nuclei_", or "Cytoplasm_". + A list of strings corresponding to feature measurement column names in the + `profiles` DataFrame. All features listed must be found in `profiles`. + Defaults to "infer". If "infer", then assume CellProfiler features are those + prefixed with "Cells", "Nuclei", or "Cytoplasm". samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is @@ -36,8 +37,8 @@ def get_na_columns(population_df, features="infer", samples="all", cutoff=0.05): if features == "infer": features = infer_cp_features(population_df) - else: - population_df = population_df.loc[:, features] + + population_df = population_df.loc[:, features] num_rows = population_df.shape[0] na_prop_df = population_df.isna().sum() / num_rows From aebc61bad59f4dd52c1c3c0d33ba102be031fcd1 Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 26 Sep 2024 05:20:34 -0600 Subject: [PATCH 06/14] clarify error message to avoid confusing recommendation to update features parameter since it does not exist in the function --- pycytominer/cyto_utils/features.py | 15 ++++++++------- tests/test_cyto_utils/test_feature_infer.py | 5 +++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index 19b5a416..63cc3d68 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -80,7 +80,7 @@ def infer_cp_features( metadata=False, image_features=False, ): - """Given a dataframe, output features that we expect to be Cell Painting features. + """Given a CellProfiler input dataframe, output feature column names as a list. Parameters ---------- @@ -89,9 +89,9 @@ def infer_cp_features( compartments : list of str, default ["Cells", "Nuclei", "Cytoplasm"] Compartments from which Cell Painting features were extracted. metadata : bool, default False - Whether or not to infer metadata features. Whether or not to infer metadata features. - If metadata is set to True, pycytominer will expect CellProfiler metadata features, - identified by feature names that begin with the `Metadata_` prefix. + Whether or not to infer metadata features. + If metadata is set to True, find column names that begin with the `Metadata_` prefix. + This convention is expected by CellProfiler defaults. image_features : bool, default False Whether or not the profiles contain image features. @@ -119,8 +119,9 @@ def infer_cp_features( if len(features) == 0: raise ValueError( - "No features found. Pycytominer expects CellProfiler features by default. " - "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + "No features or metadata found. Pycytominer expects CellProfiler column names by default. " + "If you're using non-CellProfiler data, please do not 'infer' features. " + "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." ) return features @@ -154,7 +155,7 @@ def drop_outlier_features( population_df : pandas.core.frame.DataFrame DataFrame that includes metadata and observation features. features : list of str or str, default "infer" - Features present in the population dataframe. If "infer", then assume Cell Painting features are those that start with "Cells_", "Nuclei_", or "Cytoplasm_" + Features present in the population dataframe. If "infer", then assume CellProfiler feature conventions (start with "Cells_", "Nuclei_", or "Cytoplasm_") samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is diff --git a/tests/test_cyto_utils/test_feature_infer.py b/tests/test_cyto_utils/test_feature_infer.py index bb951986..9f0ddeca 100644 --- a/tests/test_cyto_utils/test_feature_infer.py +++ b/tests/test_cyto_utils/test_feature_infer.py @@ -43,8 +43,9 @@ def test_feature_infer_nocp(): infer_cp_features(population_df=non_cp_data_df) expected_message = ( - "No features found. Pycytominer expects CellProfiler features by default. " - "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + "No features or metadata found. Pycytominer expects CellProfiler column names by default. " + "If you're using non-CellProfiler data, please do not 'infer' features. " + "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." ) assert expected_message in str(nocp.value) From aac7fead416bc5660c6a9e3b7100fcf2bdfe061e Mon Sep 17 00:00:00 2001 From: Gregory Way Date: Thu, 26 Sep 2024 15:41:23 -0600 Subject: [PATCH 07/14] update error message in tests --- tests/test_operations/test_correlation_threshold.py | 5 +++-- tests/test_operations/test_get_na_columns.py | 5 +++-- tests/test_operations/test_variance_threshold.py | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test_operations/test_correlation_threshold.py b/tests/test_operations/test_correlation_threshold.py index 1753f22a..35f82ee2 100644 --- a/tests/test_operations/test_correlation_threshold.py +++ b/tests/test_operations/test_correlation_threshold.py @@ -85,8 +85,9 @@ def test_correlation_threshold_featureinfer(): ) expected_message = ( - "No features found. Pycytominer expects CellProfiler features by default. " - "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + "No features or metadata found. Pycytominer expects CellProfiler column names by default. " + "If you're using non-CellProfiler data, please do not 'infer' features. " + "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." ) assert expected_message in str(nocp.value) diff --git a/tests/test_operations/test_get_na_columns.py b/tests/test_operations/test_get_na_columns.py index a097c82b..f64d8dd1 100644 --- a/tests/test_operations/test_get_na_columns.py +++ b/tests/test_operations/test_get_na_columns.py @@ -73,7 +73,8 @@ def test_get_na_columns_featureinfer(): ) expected_message = ( - "No features found. Pycytominer expects CellProfiler features by default. " - "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + "No features or metadata found. Pycytominer expects CellProfiler column names by default. " + "If you're using non-CellProfiler data, please do not 'infer' features. " + "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." ) assert expected_message in str(nocp.value) diff --git a/tests/test_operations/test_variance_threshold.py b/tests/test_operations/test_variance_threshold.py index d4838ce1..1c7388b5 100644 --- a/tests/test_operations/test_variance_threshold.py +++ b/tests/test_operations/test_variance_threshold.py @@ -108,8 +108,9 @@ def test_variance_threshold_featureinfer(): ) expected_message = ( - "No features found. Pycytominer expects CellProfiler features by default. " - "If you're using non-CellProfiler data, please specify the feature space using the `features` parameter." + "No features or metadata found. Pycytominer expects CellProfiler column names by default. " + "If you're using non-CellProfiler data, please do not 'infer' features. " + "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." ) assert expected_message in str(nocp.value) From bdac5766fd1f0c1f2f3a84cb9c2021c632849306 Mon Sep 17 00:00:00 2001 From: Erik Serrano <31600622+axiomcura@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:01:51 -0600 Subject: [PATCH 08/14] Update pycytominer/consensus.py Co-authored-by: Dave Bunten --- pycytominer/consensus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycytominer/consensus.py b/pycytominer/consensus.py index e8d08405..de0386e5 100644 --- a/pycytominer/consensus.py +++ b/pycytominer/consensus.py @@ -35,7 +35,7 @@ def consensus( features : list A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume CellProfiler features are those + Defaults to "infer". If "infer", then assume features are from CellProfiler output and prefixed with "Cells", "Nuclei", or "Cytoplasm". output_file : str, optional If provided, will write consensus profiles to file. If not specified, will From 42ae1ba4ea039d3bd72e973caa5f62cbc5f3ba93 Mon Sep 17 00:00:00 2001 From: Erik Serrano <31600622+axiomcura@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:02:07 -0600 Subject: [PATCH 09/14] Update pycytominer/cyto_utils/features.py Co-authored-by: Dave Bunten --- pycytominer/cyto_utils/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index 63cc3d68..5ebf2525 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -80,7 +80,7 @@ def infer_cp_features( metadata=False, image_features=False, ): - """Given a CellProfiler input dataframe, output feature column names as a list. + """Given CellProfiler output data read as a DataFrame, output feature column names as a list. Parameters ---------- From 3131b21967f25d6e59b36dbaba7a9eea5813956c Mon Sep 17 00:00:00 2001 From: Erik Serrano <31600622+axiomcura@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:02:14 -0600 Subject: [PATCH 10/14] Update pycytominer/cyto_utils/features.py Co-authored-by: Dave Bunten --- pycytominer/cyto_utils/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index 5ebf2525..e3e302b9 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -155,7 +155,7 @@ def drop_outlier_features( population_df : pandas.core.frame.DataFrame DataFrame that includes metadata and observation features. features : list of str or str, default "infer" - Features present in the population dataframe. If "infer", then assume CellProfiler feature conventions (start with "Cells_", "Nuclei_", or "Cytoplasm_") + Features present in the population dataframe. If "infer", then assume CellProfiler feature naming conventions (start with "Cells_", "Nuclei_", or "Cytoplasm_") samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is From c452a26dee1aa118a657b96d8fe3e6710c017e35 Mon Sep 17 00:00:00 2001 From: Erik Serrano <31600622+axiomcura@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:02:55 -0600 Subject: [PATCH 11/14] Update pycytominer/normalize.py Co-authored-by: Dave Bunten --- pycytominer/normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycytominer/normalize.py b/pycytominer/normalize.py index 102f7665..06c55fb6 100644 --- a/pycytominer/normalize.py +++ b/pycytominer/normalize.py @@ -34,7 +34,7 @@ def normalize( features : list A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume CellProfiler features are those + Defaults to "infer". If "infer", then assume features are from CellProfiler output and prefixed with "Cells", "Nuclei", or "Cytoplasm". image_features: bool, default False Whether the profiles contain image features. From 4c0d109ca4c86b8c4e3484166e5b56ca34e862cd Mon Sep 17 00:00:00 2001 From: Erik Serrano <31600622+axiomcura@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:39:15 -0600 Subject: [PATCH 12/14] Update pycytominer/cyto_utils/write_gct.py Co-authored-by: Dave Bunten --- pycytominer/cyto_utils/write_gct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycytominer/cyto_utils/write_gct.py b/pycytominer/cyto_utils/write_gct.py index 89add1a4..1feaab38 100644 --- a/pycytominer/cyto_utils/write_gct.py +++ b/pycytominer/cyto_utils/write_gct.py @@ -32,7 +32,7 @@ def write_gct( features : list A list of strings corresponding to feature measurement column names in the `profiles` DataFrame. All features listed must be found in `profiles`. - Defaults to "infer". If "infer", then assume CellProfiler features are those + Defaults to "infer". If "infer", then assume features are from CellProfiler output and prefixed with "Cells", "Nuclei", or "Cytoplasm". meta_features : list A list of strings corresponding to metadata column names in the `profiles` From e42181852c3b43418de93dbf9636606cd5a39923 Mon Sep 17 00:00:00 2001 From: axiomcura Date: Fri, 27 Sep 2024 14:58:24 -0600 Subject: [PATCH 13/14] made function docs multi-lined --- pycytominer/cyto_utils/features.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index e3e302b9..1d1e4d0d 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -155,7 +155,9 @@ def drop_outlier_features( population_df : pandas.core.frame.DataFrame DataFrame that includes metadata and observation features. features : list of str or str, default "infer" - Features present in the population dataframe. If "infer", then assume CellProfiler feature naming conventions (start with "Cells_", "Nuclei_", or "Cytoplasm_") + Features present in the population dataframe. If "infer", + then assume CellProfiler feature conventions + (start with "Cells_", "Nuclei_", or "Cytoplasm_") samples : str, default "all" List of samples to perform operation on. The function uses a pd.DataFrame.query() function, so you should structure samples in this fashion. An example is From 5f4ac6e931564e626b6c20ed698b9749b82d28e4 Mon Sep 17 00:00:00 2001 From: axiomcura Date: Fri, 27 Sep 2024 15:05:30 -0600 Subject: [PATCH 14/14] updated test with error messages --- tests/test_cyto_utils/test_feature_infer.py | 7 +------ tests/test_operations/test_correlation_threshold.py | 7 +------ tests/test_operations/test_get_na_columns.py | 7 +------ tests/test_operations/test_variance_threshold.py | 7 +------ 4 files changed, 4 insertions(+), 24 deletions(-) diff --git a/tests/test_cyto_utils/test_feature_infer.py b/tests/test_cyto_utils/test_feature_infer.py index 9f0ddeca..9c8e8e2a 100644 --- a/tests/test_cyto_utils/test_feature_infer.py +++ b/tests/test_cyto_utils/test_feature_infer.py @@ -42,12 +42,7 @@ def test_feature_infer_nocp(): with pytest.raises(ValueError) as nocp: infer_cp_features(population_df=non_cp_data_df) - expected_message = ( - "No features or metadata found. Pycytominer expects CellProfiler column names by default. " - "If you're using non-CellProfiler data, please do not 'infer' features. " - "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." - ) - assert expected_message in str(nocp.value) + assert "No features or metadata found." in str(nocp.value) def test_metadata_feature_infer(): diff --git a/tests/test_operations/test_correlation_threshold.py b/tests/test_operations/test_correlation_threshold.py index 35f82ee2..9845b99f 100644 --- a/tests/test_operations/test_correlation_threshold.py +++ b/tests/test_operations/test_correlation_threshold.py @@ -84,12 +84,7 @@ def test_correlation_threshold_featureinfer(): method="pearson", ) - expected_message = ( - "No features or metadata found. Pycytominer expects CellProfiler column names by default. " - "If you're using non-CellProfiler data, please do not 'infer' features. " - "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." - ) - assert expected_message in str(nocp.value) + assert "No features found." in str(nocp.value) data_cp_df = data_df.copy() data_cp_df.columns = [f"Cells_{x}" for x in data_df.columns] diff --git a/tests/test_operations/test_get_na_columns.py b/tests/test_operations/test_get_na_columns.py index f64d8dd1..48b5ab71 100644 --- a/tests/test_operations/test_get_na_columns.py +++ b/tests/test_operations/test_get_na_columns.py @@ -72,9 +72,4 @@ def test_get_na_columns_featureinfer(): population_df=data_df, samples="all", features="infer", cutoff=0.1 ) - expected_message = ( - "No features or metadata found. Pycytominer expects CellProfiler column names by default. " - "If you're using non-CellProfiler data, please do not 'infer' features. " - "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." - ) - assert expected_message in str(nocp.value) + assert "No features found." in str(nocp.value) diff --git a/tests/test_operations/test_variance_threshold.py b/tests/test_operations/test_variance_threshold.py index 1c7388b5..1d7cd481 100644 --- a/tests/test_operations/test_variance_threshold.py +++ b/tests/test_operations/test_variance_threshold.py @@ -107,12 +107,7 @@ def test_variance_threshold_featureinfer(): population_df=data_unique_test_df, features="infer", unique_cut=unique_cut ) - expected_message = ( - "No features or metadata found. Pycytominer expects CellProfiler column names by default. " - "If you're using non-CellProfiler data, please do not 'infer' features. " - "Instead, check if the function has a `features` or `meta_features` parameter, and input column names manually." - ) - assert expected_message in str(nocp.value) + assert "No features found." in str(nocp.value) data_cp_df = data_unique_test_df.copy() data_cp_df.columns = [f"Cells_{x}" for x in data_unique_test_df.columns]