From 264cf6be83ef29672537cd3e9f5962a0e0db1215 Mon Sep 17 00:00:00 2001 From: larsevj Date: Fri, 11 Oct 2024 16:08:38 +0200 Subject: [PATCH 1/3] Add default values using Pandas assign in design_matrix --- src/ert/config/design_matrix.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py index 1fac2bd96e4..346ac94cba4 100644 --- a/src/ert/config/design_matrix.py +++ b/src/ert/config/design_matrix.py @@ -112,12 +112,10 @@ def read_design_matrix( error_msg = "\n".join(error_list) raise ValueError(f"Design matrix is not valid, error:\n{error_msg}") - defaults = DesignMatrix._read_defaultssheet( - self.xls_filename, self.default_sheet + usable_defaults = DesignMatrix._read_defaultssheet( + self.xls_filename, self.default_sheet, design_matrix_df.columns.to_list() ) - for k, v in defaults.items(): - if k not in design_matrix_df.columns: - design_matrix_df[k] = v + design_matrix_df = design_matrix_df.assign(**usable_defaults) parameter_configuration: Dict[str, ParameterConfig] = {} transform_function_definitions: List[TransformFunctionDefinition] = [] @@ -200,13 +198,16 @@ def _validate_design_matrix(design_matrix: pd.DataFrame) -> List[str]: @staticmethod def _read_defaultssheet( - xls_filename: Union[Path, str], defaults_sheetname: str + xls_filename: Union[Path, str], + defaults_sheetname: str, + existing_parameters: List[str], ) -> Dict[str, Union[str, float]]: """ Construct a dict of keys and values to be used as defaults from the - first two columns in a spreadsheet. + first two columns in a spreadsheet. Only returns the keys that are + different from the exisiting parameters. - Returns a dict of default values + Returns a dict of usable default values :raises: ValueError if defaults sheet is non-empty but non-parsable """ @@ -230,7 +231,11 @@ def _read_defaultssheet( if not default_df[0].is_unique: raise ValueError("Default sheet contains duplicate parameter names") - return {row[0]: convert_to_numeric(row[1]) for _, row in default_df.iterrows()} + return { + row[0]: convert_to_numeric(row[1]) + for _, row in default_df.iterrows() + if row[0] not in existing_parameters + } def convert_to_numeric(x: str) -> Union[str, float]: From 5daf3ded8b4ff687cea3a512fb6d599d3428743b Mon Sep 17 00:00:00 2001 From: larsevj Date: Tue, 15 Oct 2024 14:02:39 +0100 Subject: [PATCH 2/3] Reword acording to review --- src/ert/config/design_matrix.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py index 346ac94cba4..9ef318e9c9f 100644 --- a/src/ert/config/design_matrix.py +++ b/src/ert/config/design_matrix.py @@ -112,10 +112,10 @@ def read_design_matrix( error_msg = "\n".join(error_list) raise ValueError(f"Design matrix is not valid, error:\n{error_msg}") - usable_defaults = DesignMatrix._read_defaultssheet( + defaults_to_use = DesignMatrix._read_defaultssheet( self.xls_filename, self.default_sheet, design_matrix_df.columns.to_list() ) - design_matrix_df = design_matrix_df.assign(**usable_defaults) + design_matrix_df = design_matrix_df.assign(**defaults_to_use) parameter_configuration: Dict[str, ParameterConfig] = {} transform_function_definitions: List[TransformFunctionDefinition] = [] @@ -207,7 +207,7 @@ def _read_defaultssheet( first two columns in a spreadsheet. Only returns the keys that are different from the exisiting parameters. - Returns a dict of usable default values + Returns a dict of default values :raises: ValueError if defaults sheet is non-empty but non-parsable """ From 2a494447f9c6a4228c95e201fec0c3d58cd16cc9 Mon Sep 17 00:00:00 2001 From: larsevj Date: Thu, 31 Oct 2024 16:00:27 +0100 Subject: [PATCH 3/3] Add test for used default values --- .../test_design_matrix.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py index 7b982513c91..2326f76bc3a 100644 --- a/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py +++ b/tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py @@ -183,3 +183,30 @@ def test_reading_default_sheet_validation(tmp_path, data, error_msg): design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") with pytest.raises(ValueError, match=error_msg): design_matrix.read_design_matrix() + + +def test_default_values_used(tmp_path): + design_path = tmp_path / "design_matrix.xlsx" + design_matrix_df = pd.DataFrame( + { + "REAL": [0, 1, 2, 3], + "a": [1, 2, 3, 4], + "b": [0, 2, 0, 1], + "c": ["low", "high", "medium", "low"], + } + ) + default_sheet_df = pd.DataFrame([["one", 1], ["b", 4], ["d", "case_name"]]) + with pd.ExcelWriter(design_path) as xl_write: + design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") + default_sheet_df.to_excel( + xl_write, index=False, sheet_name="DefaultValues", header=False + ) + design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") + design_matrix.read_design_matrix() + df = design_matrix.design_matrix_df + np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "one"], np.array([1, 1, 1, 1])) + np.testing.assert_equal(df[DESIGN_MATRIX_GROUP, "b"], np.array([0, 2, 0, 1])) + np.testing.assert_equal( + df[DESIGN_MATRIX_GROUP, "d"], + np.array(["case_name", "case_name", "case_name", "case_name"]), + )