From 3f8348cc9b4d3fb0d188b5d99a9749675780fefe Mon Sep 17 00:00:00 2001 From: Hillary Tsang Date: Fri, 16 Oct 2020 15:41:41 -0400 Subject: [PATCH 1/5] added assertion error for missing replicates --- cytominer_eval/operations/percent_strong.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cytominer_eval/operations/percent_strong.py b/cytominer_eval/operations/percent_strong.py index c8902cd..c36f9a7 100644 --- a/cytominer_eval/operations/percent_strong.py +++ b/cytominer_eval/operations/percent_strong.py @@ -29,6 +29,7 @@ def percent_strong( A metric describing the proportion of replicates that correlate above the given quantile of non-replicate correlation distribution """ + assert 0 < quantile and 1 >= quantile, "quantile must be between 0 and 1" similarity_melted_df = assign_replicates( @@ -38,13 +39,17 @@ def percent_strong( # Check to make sure that the melted dataframe is upper triangle assert_melt(similarity_melted_df, eval_metric="percent_strong") + # check that there ARE group_replicates (non-unique rows) + replicate_df = similarity_melted_df.query("group_replicate") + denom = replicate_df.shape[0] + + ### HYT's addition, though i dont know if this will work + assert denom != 0, "no replicate groups identified!" + non_replicate_quantile = similarity_melted_df.query( "not group_replicate" ).similarity_metric.quantile(quantile) - replicate_df = similarity_melted_df.query("group_replicate") - denom = replicate_df.shape[0] - percent_strong = ( replicate_df.similarity_metric > non_replicate_quantile ).sum() / denom From 6ef08ad3b06793a17bb5b9f48c4546cdcef6621a Mon Sep 17 00:00:00 2001 From: hillsbury Date: Mon, 2 Nov 2020 21:18:06 -0500 Subject: [PATCH 2/5] Apply suggestions from code review Co-authored-by: Greg Way --- cytominer_eval/operations/percent_strong.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cytominer_eval/operations/percent_strong.py b/cytominer_eval/operations/percent_strong.py index c36f9a7..a06bd85 100644 --- a/cytominer_eval/operations/percent_strong.py +++ b/cytominer_eval/operations/percent_strong.py @@ -39,12 +39,12 @@ def percent_strong( # Check to make sure that the melted dataframe is upper triangle assert_melt(similarity_melted_df, eval_metric="percent_strong") - # check that there ARE group_replicates (non-unique rows) + # check that there are group_replicates (non-unique rows) replicate_df = similarity_melted_df.query("group_replicate") denom = replicate_df.shape[0] ### HYT's addition, though i dont know if this will work - assert denom != 0, "no replicate groups identified!" + assert denom != 0, "no replicate groups identified in {rep} columns!".format(rep=replicate_groups) non_replicate_quantile = similarity_melted_df.query( "not group_replicate" From 8281e1930bfc9e904d59ee4bc547aafd55b80b24 Mon Sep 17 00:00:00 2001 From: Hillary Tsang Date: Tue, 3 Nov 2020 09:53:00 -0500 Subject: [PATCH 3/5] adding test for unqiue cols --- .../test_operations/test_percent_strong.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cytominer_eval/tests/test_operations/test_percent_strong.py b/cytominer_eval/tests/test_operations/test_percent_strong.py index 892b889..1cbffc7 100644 --- a/cytominer_eval/tests/test_operations/test_percent_strong.py +++ b/cytominer_eval/tests/test_operations/test_percent_strong.py @@ -53,3 +53,26 @@ def test_percent_strong(): expected_result = 0.3074 assert np.round(output, 4) == expected_result + + +def test_percent_strong_uniquerows(): + with pytest.raises(AssertionError) as err: + replicate_groups = ["Metadata_pert_well"] + output = percent_strong( + similarity_melted_df=similarity_melted_df, + replicate_groups=replicate_groups, + quantile=0.95, + ) + assert "no replicate groups identified in {rep} columns!".format( + rep=replicate_groups + ) in str(err.value) + + ### REFERENCE CODE + # with pytest.raises(AssertionError) as ve: + # replicate_groups = ["MISSING_COLUMN"] + # result = percent_strong( + # similarity_melted_df=similarity_melted_df, + # replicate_groups=replicate_groups, + # quantile=0.95, + # ) + # assert "replicate_group not found in melted dataframe columns" in str(ve.value) From 4b07c15d3df87113b39af4f16e1360aa2e342a95 Mon Sep 17 00:00:00 2001 From: Hillary Tsang Date: Tue, 3 Nov 2020 09:54:42 -0500 Subject: [PATCH 4/5] adding test for unqiue cols --- .../tests/test_operations/test_percent_strong.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/cytominer_eval/tests/test_operations/test_percent_strong.py b/cytominer_eval/tests/test_operations/test_percent_strong.py index 1cbffc7..5084b7c 100644 --- a/cytominer_eval/tests/test_operations/test_percent_strong.py +++ b/cytominer_eval/tests/test_operations/test_percent_strong.py @@ -66,13 +66,3 @@ def test_percent_strong_uniquerows(): assert "no replicate groups identified in {rep} columns!".format( rep=replicate_groups ) in str(err.value) - - ### REFERENCE CODE - # with pytest.raises(AssertionError) as ve: - # replicate_groups = ["MISSING_COLUMN"] - # result = percent_strong( - # similarity_melted_df=similarity_melted_df, - # replicate_groups=replicate_groups, - # quantile=0.95, - # ) - # assert "replicate_group not found in melted dataframe columns" in str(ve.value) From df05b4d92745c0b0b28f1afa0fd64814554eb9b0 Mon Sep 17 00:00:00 2001 From: Hillary Tsang Date: Tue, 3 Nov 2020 10:25:39 -0500 Subject: [PATCH 5/5] removing unnecessary comment --- cytominer_eval/operations/percent_strong.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cytominer_eval/operations/percent_strong.py b/cytominer_eval/operations/percent_strong.py index a06bd85..6b7a20e 100644 --- a/cytominer_eval/operations/percent_strong.py +++ b/cytominer_eval/operations/percent_strong.py @@ -43,8 +43,9 @@ def percent_strong( replicate_df = similarity_melted_df.query("group_replicate") denom = replicate_df.shape[0] - ### HYT's addition, though i dont know if this will work - assert denom != 0, "no replicate groups identified in {rep} columns!".format(rep=replicate_groups) + assert denom != 0, "no replicate groups identified in {rep} columns!".format( + rep=replicate_groups + ) non_replicate_quantile = similarity_melted_df.query( "not group_replicate"