Skip to content

Commit

Permalink
Add file size loading support (#112)
Browse files Browse the repository at this point in the history
* Add file size loading support

* minor update

* linting
  • Loading branch information
Innixma authored Dec 19, 2024
1 parent c1d850f commit 7a32e30
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 13 deletions.
26 changes: 26 additions & 0 deletions src/autogluon/bench/eval/benchmark_context/output_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def load_results(
return None
return results

def load_info_file_sizes(self) -> pd.DataFrame:
return load_pd.load(self.path_info_file_sizes)

def load_leaderboard(self) -> pd.DataFrame:
return load_pd.load(self.path_leaderboard)

Expand Down Expand Up @@ -240,6 +243,29 @@ def get_model_failures(self) -> Union[pd.DataFrame, None]:
model_failures_full_df = pd.merge(model_failures_df, results, on="id", how="left")
return model_failures_full_df

def get_info_file_sizes(self, sum: bool = False) -> Union[pd.DataFrame, None]:
"""
Load and return the model info file sizes CSV as a pandas DataFrame if it exists, else return None.
Will merge with the results to get additional information, akin to the leaderboard output.
"""
results = self.load_results()
results = results[["id", "task", "framework", "constraint", "fold", "type", "result", "metric"]]
try:
info_file_sizes = self.load_info_file_sizes()
except Exception as e:
print(f"FAILURE:\n" f"\t{e.__class__.__name__}: {e}")
return None
else:
if sum:
total_size = info_file_sizes["size"].sum()
results["size"] = total_size
return results
else:
info_file_sizes["id"] = results["id"][0]
info_file_sizes_full_df = pd.merge(info_file_sizes, results, on="id", how="left")
return info_file_sizes_full_df

def get_logs(self) -> str:
s3_bucket, s3_prefix = s3_path_to_bucket_prefix(s3_path=self.path_logs)
s3 = boto3.client("s3", use_ssl=False)
Expand Down
20 changes: 20 additions & 0 deletions src/autogluon/bench/eval/benchmark_context/output_suite_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,26 @@ def load_model_failures(self) -> List[pd.DataFrame]:
func=OutputContext.get_model_failures, input_list=self.output_contexts, allow_exception=True
)

def load_info_file_sizes(self, sum: bool = False) -> List[pd.DataFrame]:
return self._loop_func(
func=OutputContext.get_info_file_sizes,
input_list=self.output_contexts,
kwargs={"sum": sum},
allow_exception=True,
)

def aggregate_info_file_sizes(self, sum: bool = False) -> pd.DataFrame:
info_file_sizes_lst = self.load_info_file_sizes(sum=sum)
none_count = 0
for e in info_file_sizes_lst:
if e is None:
none_count += 1
if none_count == len(info_file_sizes_lst):
return pd.DataFrame()
else:
info_file_sizes_df = pd.concat(info_file_sizes_lst, ignore_index=True)
return info_file_sizes_df

def aggregate_model_failures(self) -> pd.DataFrame:
model_failures_list = self.load_model_failures()
none_count = 0
Expand Down
7 changes: 4 additions & 3 deletions src/autogluon/bench/eval/evaluation/benchmark_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,10 @@ def _load_results(
else:
assert isinstance(paths, pd.DataFrame)
results_raw = paths
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
results_raw.loc[results_raw[TIME_INFER_S] == 0, TIME_INFER_S] = 0.001
if TIME_INFER_S in results_raw:
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
results_raw.loc[results_raw[TIME_INFER_S] == 0, TIME_INFER_S] = 0.001
if clean_data:
# FIXME: This doesn't work on new tasks due to not comprehensive metadata
results_raw = self._clean_data(results_raw=results_raw)
Expand Down
26 changes: 16 additions & 10 deletions tests/unittests/cloud/aws/test_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ def test_static_resource_stack_without_vpc():
for key, value in context_values.items():
app.node.set_context(key, value)

with patch.object(
StaticResourceStack,
"create_s3_resources",
return_value=s3.Bucket(Stack(app, "TestBucketStack"), "DummyBucket"),
) as mock_s3_resources, patch.dict(os.environ, {"CDK_DEPLOY_REGION": "dummy_region"}):
with (
patch.object(
StaticResourceStack,
"create_s3_resources",
return_value=s3.Bucket(Stack(app, "TestBucketStack"), "DummyBucket"),
) as mock_s3_resources,
patch.dict(os.environ, {"CDK_DEPLOY_REGION": "dummy_region"}),
):
stack = StaticResourceStack(app, "TestStaticResourceStack", env=env)

mock_s3_resources.assert_called_once()
Expand All @@ -37,11 +40,14 @@ def test_static_resource_stack_with_vpc():
app.node.set_context(key, value)
app.node.set_context("VPC_NAME", "ProvidedVpcName")

with patch.object(
StaticResourceStack,
"create_s3_resources",
return_value=s3.Bucket(Stack(app, "TestBucketStack"), "DummyBucket"),
) as mock_s3_resources, patch.dict(os.environ, {"CDK_DEPLOY_REGION": "dummy_region"}):
with (
patch.object(
StaticResourceStack,
"create_s3_resources",
return_value=s3.Bucket(Stack(app, "TestBucketStack"), "DummyBucket"),
) as mock_s3_resources,
patch.dict(os.environ, {"CDK_DEPLOY_REGION": "dummy_region"}),
):
stack = StaticResourceStack(app, "TestStaticResourceStack", env=env)

mock_s3_resources.assert_called_once()
Expand Down

0 comments on commit 7a32e30

Please sign in to comment.