Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix scoring bug, properly handeling nan values #780

Merged
merged 2 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ algorithmic_efficiency/workloads/librispeech_conformer/work_dir
*.vocab
wandb/
*.txt
scoring/plots/

!scoring/test_data/experiment_dir/study_0/mnist_jax/trial_0/eval_measurements.csv
!scoring/test_data/experiment_dir/study_0/mnist_jax/trial_1/eval_measurements.csv
21 changes: 17 additions & 4 deletions scoring/performance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
the dictionary of submissions.
"""
import itertools
import json
import operator
import os
import re
Expand All @@ -45,6 +46,10 @@
BASE_WORKLOADS = workloads_registry.BASE_WORKLOADS
WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)'
BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/'
# Open json file to read heldout workloads
# TODO: This probably shouldn't be hardcoded but passed as an argument.
with open("held_out_workloads_algoperf_v05.json", "r") as f:
HELDOUT_WORKLOADS = json.load(f)
# These global variables have to be set according to the current set of
# workloads and rules for the scoring to be correct.
# We do not use the workload registry since it contains test and development
Expand Down Expand Up @@ -248,6 +253,9 @@ def filter(x):
try:
if x[variant_workload] == np.inf:
return np.inf
# Also check for nan values (e.g. OOMs)
elif np.isnan(x[variant_workload]):
return np.inf
else:
return x[base_workload]
except KeyError as e:
Expand Down Expand Up @@ -306,8 +314,14 @@ def compute_performance_profiles(submissions,
self_tuning_ruleset,
strict))
df = pd.concat(dfs)

# For each held-out workload set to inf if the base workload is inf
# Restrict to base and sampled held-out workloads
# (ignore the additional workload variants of the baseline
# as they cause issues when checking for nans in workload variants).
df = df[BASE_WORKLOADS + HELDOUT_WORKLOADS]
# Sort workloads alphabetically (for better display)
df = df.reindex(sorted(df.columns), axis=1)

# For each held-out workload set to inf if the base workload is inf or nan
for workload in df.keys():
if workload not in BASE_WORKLOADS:
# If base do not have finite score set variant score to inf
Expand All @@ -319,14 +333,13 @@ def compute_performance_profiles(submissions,
best_scores = df.min(axis=0)
df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf

# For each held-out workload if variant target was not hit set submission to inf
# For each base workload if variant target was not hit set submission to inf
for workload in df.keys():
if workload not in BASE_WORKLOADS:
# If variants do not have finite score set base_workload score to inf
base_workload = get_base_workload_name(workload)
df[base_workload] = df.apply(
variant_criteria_filter(base_workload, workload), axis=1)

df = df[BASE_WORKLOADS]

if verbosity > 0:
Expand Down
Loading