From e38a3e7d967a1f3633ea17e5e9b18aa3c88ad4ba Mon Sep 17 00:00:00 2001 From: innixma Date: Fri, 11 Oct 2024 23:16:06 +0000 Subject: [PATCH] Update repo.evaluate_ensemble to return DataFrame --- tabrepo/repository/ensemble_mixin.py | 28 ++++++++++++++++------------ tst/test_repository.py | 14 +++++++------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/tabrepo/repository/ensemble_mixin.py b/tabrepo/repository/ensemble_mixin.py index 4b8a310d..22d0d879 100644 --- a/tabrepo/repository/ensemble_mixin.py +++ b/tabrepo/repository/ensemble_mixin.py @@ -26,7 +26,7 @@ def evaluate_ensemble( rank: bool = True, folds: list[int] | None = None, backend: str = "ray", - ) -> Tuple[pd.Series, pd.DataFrame]: + ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ :param datasets: list of datasets to compute errors on. :param configs: list of config to consider for ensembling. Uses all configs if None. @@ -58,22 +58,29 @@ def evaluate_ensemble( backend=backend, ) + dataset_folds = [(self.task_to_dataset(task=task), self.task_to_fold(task=task)) for task in tasks] dict_errors, dict_ensemble_weights = scorer.compute_errors(configs=configs) + metric_error_list = [dict_errors[task] for task in tasks] + datasets_info = self.datasets_info(datasets=datasets) + dataset_metric_list = [datasets_info.loc[d]["metric"] for d, f in dataset_folds] + problem_type_list = [datasets_info.loc[d]["problem_type"] for d, f in dataset_folds] + + output_dict = { + "metric_error": metric_error_list, + "metric": dataset_metric_list, + "problem_type": problem_type_list, + } if rank: - dict_scores = scorer.compute_ranks(errors=dict_errors) - out = dict_scores - else: - out = dict_errors + dict_ranks = scorer.compute_ranks(errors=dict_errors) + rank_list = [dict_ranks[task] for task in tasks] + output_dict["rank"] = rank_list - dataset_folds = [(self.task_to_dataset(task=task), self.task_to_fold(task=task)) for task in tasks] ensemble_weights = [dict_ensemble_weights[task] for task in tasks] - out_list = [out[task] for task in tasks] multiindex = pd.MultiIndex.from_tuples(dataset_folds, names=["dataset", "fold"]) - df_name = "rank" if rank else "metric_error" - df_out = pd.Series(data=out_list, index=multiindex, name=df_name) + df_out = pd.DataFrame(data=output_dict, index=multiindex) df_ensemble_weights = pd.DataFrame(data=ensemble_weights, index=multiindex, columns=configs) return df_out, df_ensemble_weights @@ -161,11 +168,8 @@ def evaluate_ensemble_with_time( time_infer_s = sum(latencies.values()) task_time_map[(dataset, fold)] = {"time_train_s": time_train_s, "time_infer_s": time_infer_s} - df_out = df_out.to_frame() df_task_time = pd.DataFrame(task_time_map).T df_out[["time_train_s", "time_infer_s"]] = df_task_time - df_datasets_info = self.datasets_info(datasets=[dataset]) - df_out = df_out.join(df_datasets_info, how="inner") df_datasets_to_tids = self.datasets_to_tids(datasets=[dataset]).to_frame() df_datasets_to_tids.index.name = "dataset" df_out = df_out.join(df_datasets_to_tids, how="inner") diff --git a/tst/test_repository.py b/tst/test_repository.py index c4001d88..6985137a 100644 --- a/tst/test_repository.py +++ b/tst/test_repository.py @@ -61,7 +61,7 @@ def test_repository(): assert repo.labels_test(dataset=dataset, fold=2).shape == (13,) assert repo.dataset_metadata(dataset=dataset) == {'dataset': dataset, 'task_type': 'TaskType.SUPERVISED_CLASSIFICATION'} result_errors, result_ensemble_weights = repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native") - assert result_errors.shape == (3,) + assert result_errors.shape == (3, 4) assert len(result_ensemble_weights) == 3 dataset_info = repo.dataset_info(dataset=dataset) @@ -76,12 +76,12 @@ def test_repository(): result_errors_w_max_models, result_ensemble_weights_w_max_models = repo.evaluate_ensemble( datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native", ensemble_kwargs={"max_models_per_type": 1} ) - assert result_errors_w_max_models.shape == (3,) + assert result_errors_w_max_models.shape == (3, 4) assert len(result_ensemble_weights_w_max_models) == 3 assert np.allclose(result_ensemble_weights_w_max_models.loc[(dataset, 0)], [1.0, 0.0]) assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], - ensemble_size=5, folds=[2], backend="native")[0].shape == (1,) + ensemble_size=5, folds=[2], backend="native")[0].shape == (1, 4) repo: EvaluationRepository = repo.subset(folds=[0, 2]) assert repo.datasets() == ['abalone', 'ada'] @@ -93,9 +93,9 @@ def test_repository(): assert repo.predict_test(dataset=dataset, config=config, fold=2).shape == (13, 25) assert repo.dataset_metadata(dataset=dataset) == {'dataset': dataset, 'task_type': 'TaskType.SUPERVISED_CLASSIFICATION'} # result_errors, result_ensemble_weights = repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native")[0], - assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native")[0].shape == (2,) + assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native")[0].shape == (2, 4) assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], - ensemble_size=5, folds=[2], backend="native")[0].shape == (1,) + ensemble_size=5, folds=[2], backend="native")[0].shape == (1, 4) repo: EvaluationRepository = repo.subset(folds=[2], datasets=[dataset], configs=[config]) assert repo.datasets() == ['abalone'] @@ -106,10 +106,10 @@ def test_repository(): assert repo.predict_val(dataset=dataset, config=config, fold=2).shape == (123, 25) assert repo.predict_test(dataset=dataset, config=config, fold=2).shape == (13, 25) assert repo.dataset_metadata(dataset=dataset) == {'dataset': dataset, 'task_type': 'TaskType.SUPERVISED_CLASSIFICATION'} - assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native")[0].shape == (1,) + assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], ensemble_size=5, backend="native")[0].shape == (1, 4) assert repo.evaluate_ensemble(datasets=[dataset], configs=[config, config], - ensemble_size=5, folds=[2], backend="native")[0].shape == (1,) + ensemble_size=5, folds=[2], backend="native")[0].shape == (1, 4) def test_repository_force_to_dense():