Major Refactor: Add save/load to dir, code refactor, etc. (#86)

* adding test scripts * matching tabrepo and fit df, using zeroshot_context * plotting functionality * Update * WIP exec.py * Add updates * Add v2 scripts * Remove y_uncleaned * resolve merge conflicts * resolve merge conflicts * resolve merge conflicts * adding test scripts * plotting functionality * Initial Class implementation * typo * minor updates * add run_scripts_v4 * making run_experiment a staticmethod * Updated run_experiments * Cleanup, add TabPFNv2 prototype * Cleanup * Cleanup * Cleanup * Cleanup * Cleanup * bug fix * Add run_tabpfn_v2_benchmark.py + additional bugfixes * Add TabForestPFN_class.py * Add TabForestPFN_class.py * Delete old files * Update file locations * Add AutoGluon_class.py, tabforestpfn_model.py * add hyperparameter/init_args support * Add run_tabforestpfn_benchmark.py * removing unused files * Update add simulation_artifacts support * Add simulation ensemble comparison support via `evaluate_ensemble_with_time` * update * update * minor cleanup * minor cleanup * Update evaluate_ensemble_with_time * Fix bug in zeroshot_configs * Refactor baselines.py * Add repo.evaluate_ensemble_with_time_multi * Update repo.evaluate_ensemble to return DataFrame * Add logger module, and adding wrapper logs to run scripts, will add deeper level logs in next commit * minor update * Refactor evaluate_ensemble * Refactor evaluate_ensemble * Refactor evaluate_ensemble * Cleanup * Cleanup * Cleanup * Add logic to context.py * minor update * Add save/load logic to ZeroshotSimulatorContext * Add save/load logic to EvaluationRepository * Align column names in model fits * Add unit tests for repo save/load * Add extra unit tests for repo save/load * Fix Self import * Fix imports * fix tests * simplify run_quickstart_from_scratch.py * minor update * update `repo.from_raw` * Add root, app and console loggers * addition to logging module * add context save/load with json + relative path support * add ebm and tabpfnv2 models * add ebm and tabpfnv2 models * update * update * update * update * update * Support loading repo artifact from cloned directory * minor fix * cleanup * update * Update * cleanup * Add simple benchmark runner * cleanup * Update for ag12 * Update for ag12 * Update for ag12 * TabPFN support stopped at best epoch * update * update 2025 * update 2025 * update 2025 * update 2025 * update 2025 * update 2025 * update 2025 * update 2025 * update 2025 * Add docstrings for evaluate_ensemble and evaluate_ensembles * Add docstrings, code cleanup * delete old scripts * delete old scripts * update * remove scripts_v6 * remove scripts_v5 * remove context_dl.py * update plot_test_vs_val.py * remove experiment_utils_v6.py * cleanup * cleanup * bug fix * switch from np.bool8 to np.bool_ * Update scripts/baseline_comparison/baselines.py Co-authored-by: David Salinas <[email protected]> * Update tabrepo/simulation/ensemble_selection_config_scorer.py Co-authored-by: David Salinas <[email protected]> * Update tabrepo/simulation/ensemble_selection_config_scorer.py Co-authored-by: David Salinas <[email protected]> * address comment --------- Co-authored-by: Ubuntu <[email protected]> Co-authored-by: Ubuntu <[email protected]> Co-authored-by: Prateek M Desai <[email protected]> Co-authored-by: David Salinas <[email protected]>
autogluon · Jan 14, 2025 · 329deab · 329deab
1 parent ad34eea
commit 329deab
Show file tree

Hide file tree

Showing 29 changed files with 1,462 additions and 427 deletions.
diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ To evaluate an ensemble of any list of configuration, you can run the following:
 ```python
 from tabrepo import load_repository
 repo = load_repository("D244_F3_C1530_30")
-print(repo.evaluate_ensemble(datasets=["Australian"], configs=["CatBoost_r22_BAG_L1", "RandomForest_r12_BAG_L1"]))
+print(repo.evaluate_ensemble(dataset="Australian", fold=0, configs=["CatBoost_r22_BAG_L1", "RandomForest_r12_BAG_L1"]))
 ```
 
 this code will return the error of an ensemble whose weights are computed with the Caruana procedure after loading model

diff --git a/examples/run_quickstart.py b/examples/run_quickstart.py
@@ -61,8 +61,8 @@
     y_val = repo.labels_val(dataset=dataset, fold=0)
     print(f"Ground Truth Val (dataset={dataset}, fold=0):\n{y_val[:10]}")
 
-    df_ranks, df_ensemble_weights = repo.evaluate_ensemble(datasets=[dataset], configs=configs, ensemble_size=100)
-    print(f"Ensemble rank per task:\n{df_ranks}")
+    df_result, df_ensemble_weights = repo.evaluate_ensemble(dataset=dataset, fold=0, configs=configs, ensemble_size=100)
+    print(f"Ensemble result:\n{df_result}")
 
     df_ensemble_weights_mean_sorted = df_ensemble_weights.mean(axis=0).sort_values(ascending=False)
     print(f"Top 10 highest mean ensemble weight configs:\n{df_ensemble_weights_mean_sorted.head(10)}")

diff --git a/examples/run_quickstart_from_scratch.py b/examples/run_quickstart_from_scratch.py
@@ -1,16 +1,9 @@
 import pandas as pd
 
-from autogluon.common.savers import save_pd
-from autogluon.common.utils.simulation_utils import convert_simulation_artifacts_to_tabular_predictions_dict
 from autogluon.tabular import TabularPredictor
 from autogluon_benchmark import OpenMLTaskWrapper
 
 from tabrepo import EvaluationRepository
-from tabrepo.repository import EvaluationRepositoryZeroshot
-from tabrepo.predictions import TabularPredictionsInMemory
-from tabrepo.contexts.context import BenchmarkContext, construct_context
-from tabrepo.contexts.subcontext import BenchmarkSubcontext
-from tabrepo.simulation.ground_truth import GroundTruth
 
 
 def get_artifacts(task: OpenMLTaskWrapper, fold: int, hyperparameters: dict, dataset: str = None, time_limit=60):
@@ -125,51 +118,22 @@ def convert_leaderboard_to_configs(leaderboard: pd.DataFrame, minimal: bool = Tr
                 )
             )
 
-    # TODO: Move into AutoGluonTaskWrapper
-    simulation_artifacts_full = dict()
-    leaderboards = []
-    for simulation_artifacts, leaderboard in artifacts:
-        leaderboards.append(leaderboard)
+    results_lst_simulation_artifacts = [simulation_artifacts for simulation_artifacts, leaderboard in artifacts]
+
+    leaderboards = [leaderboard for simulation_artifacts, leaderboard in artifacts]
     leaderboard_full = pd.concat(leaderboards)
-    print(leaderboard_full)
-    for simulation_artifacts, leaderboard in artifacts:
-        for k in simulation_artifacts.keys():
-            if k not in simulation_artifacts_full:
-                simulation_artifacts_full[k] = {}
-            for f in simulation_artifacts[k]:
-                if f in simulation_artifacts_full:
-                    raise AssertionError(f"Two results exist for tid {k}, fold {f}!")
-                else:
-                    simulation_artifacts_full[k][f] = simulation_artifacts[k][f]
-
-    zeroshot_pp, zeroshot_gt = convert_simulation_artifacts_to_tabular_predictions_dict(simulation_artifacts=simulation_artifacts_full)
-
-    save_loc = "./quickstart/"
-    save_loc_data_dir = save_loc + "model_predictions/"
-
-    predictions = TabularPredictionsInMemory.from_dict(zeroshot_pp)
-    ground_truth = GroundTruth.from_dict(zeroshot_gt)
-    predictions.to_data_dir(data_dir=save_loc_data_dir)
-    ground_truth.to_data_dir(data_dir=save_loc_data_dir)
 
     df_configs = convert_leaderboard_to_configs(leaderboard=leaderboard_full)
-    save_pd.save(path=f"{save_loc}configs.parquet", df=df_configs)
+    print(df_configs)
 
-    context: BenchmarkContext = construct_context(
-        name="quickstart",
-        datasets=datasets,
-        folds=folds,
-        local_prefix=save_loc,
-        local_prefix_is_relative=False,
-        has_baselines=False)
-    subcontext = BenchmarkSubcontext(parent=context)
+    repo = EvaluationRepository.from_raw(df_configs=df_configs, results_lst_simulation_artifacts=results_lst_simulation_artifacts)
 
     # Note: Can also skip all the above code if you want to use a readily available context rather than generating from scratch:
-    # from tabrepo.contexts import get_subcontext
-    # subcontext = get_subcontext(name="D244_F3_C1530_30")
+    # repo = EvaluationRepository.from_context(version="D244_F3_C1530_30", cache=True)
+
+    repo.print_info()
 
-    repo: EvaluationRepository = subcontext.load_from_parent()
-    repo: EvaluationRepositoryZeroshot = repo.to_zeroshot()
+    repo = repo.to_zeroshot()
 
     results_cv = repo.simulate_zeroshot(num_zeroshot=3, n_splits=2, backend="seq")
     df_results = repo.generate_output_from_portfolio_cv(portfolio_cv=results_cv, name="quickstart")