From c6c91e93cb3548a8e1dffbc42bc1847a023e5560 Mon Sep 17 00:00:00 2001
From: Nicole Deflaux <deflaux@verily.com>
Date: Thu, 10 Nov 2022 19:20:09 +0000
Subject: [PATCH] Use floats with less precision in the normalization steps.

---
 pipelines/mining/cytomining_jumpcp.wdl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pipelines/mining/cytomining_jumpcp.wdl b/pipelines/mining/cytomining_jumpcp.wdl
index c87608b..ca1fd5b 100644
--- a/pipelines/mining/cytomining_jumpcp.wdl
+++ b/pipelines/mining/cytomining_jumpcp.wdl
@@ -201,7 +201,7 @@ task profiling {
     import time
     import pandas as pd
     from pycytominer.cyto_utils.cells import SingleCells
-    from pycytominer.cyto_utils.load import load_platemap
+    from pycytominer.cyto_utils.load import load_profiles
     from pycytominer.cyto_utils import output
     from pycytominer import normalize, annotate
     
@@ -225,14 +225,15 @@ task profiling {
 
     print("-----[ Aggregating profiles, this takes a long time. ]----- ")
     start = time.time()
-    aggregated_df = sc.aggregate_profiles()
-    output(aggregated_df, "~{agg_filename}", float_format=FLOAT_FORMAT, compression_options=COMPRESSION)
+    output(sc.aggregate_profiles(), "~{agg_filename}", float_format=FLOAT_FORMAT, compression_options=COMPRESSION)
     print("Time: " + str(time.time() - start))
 
     print("-----[ Annotating with metadata. ]-----")
     start = time.time()
     annotated_df = annotate(
-        profiles=aggregated_df,
+        # Read in the profiles instead of using the dataframe in memory so that the lower precisions floats
+        # are used in the normalization step. Do this for consistency with other implemenations of this pipeline.
+        profiles=load_profiles("~{agg_filename}"),
         platemap="~{merged_metadata_filename}",
         join_on = [add_prefix_if_missing("~{plate_map_join_col_left}"),
                    add_prefix_if_missing("~{plate_map_join_col_right}")])