From c6c91e93cb3548a8e1dffbc42bc1847a023e5560 Mon Sep 17 00:00:00 2001 From: Nicole Deflaux Date: Thu, 10 Nov 2022 19:20:09 +0000 Subject: [PATCH] Use floats with less precision in the normalization steps. --- pipelines/mining/cytomining_jumpcp.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pipelines/mining/cytomining_jumpcp.wdl b/pipelines/mining/cytomining_jumpcp.wdl index c87608b..ca1fd5b 100644 --- a/pipelines/mining/cytomining_jumpcp.wdl +++ b/pipelines/mining/cytomining_jumpcp.wdl @@ -201,7 +201,7 @@ task profiling { import time import pandas as pd from pycytominer.cyto_utils.cells import SingleCells - from pycytominer.cyto_utils.load import load_platemap + from pycytominer.cyto_utils.load import load_profiles from pycytominer.cyto_utils import output from pycytominer import normalize, annotate @@ -225,14 +225,15 @@ task profiling { print("-----[ Aggregating profiles, this takes a long time. ]----- ") start = time.time() - aggregated_df = sc.aggregate_profiles() - output(aggregated_df, "~{agg_filename}", float_format=FLOAT_FORMAT, compression_options=COMPRESSION) + output(sc.aggregate_profiles(), "~{agg_filename}", float_format=FLOAT_FORMAT, compression_options=COMPRESSION) print("Time: " + str(time.time() - start)) print("-----[ Annotating with metadata. ]-----") start = time.time() annotated_df = annotate( - profiles=aggregated_df, + # Read in the profiles instead of using the dataframe in memory so that the lower precisions floats + # are used in the normalization step. Do this for consistency with other implemenations of this pipeline. + profiles=load_profiles("~{agg_filename}"), platemap="~{merged_metadata_filename}", join_on = [add_prefix_if_missing("~{plate_map_join_col_left}"), add_prefix_if_missing("~{plate_map_join_col_right}")])