diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index 46b2bc3..3305c79 100755 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -172,16 +172,16 @@ def run(batch_size, rt("sparse", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=True, compress="sparse") if run_experiments: - # rexp("fp32", "default", print_header=print_header) - # print_header = False - # rexp("bf16", "codesign", use_half="bfloat16") - # rexp("compile", "codesign", use_half="bfloat16", use_compile="max-autotune") - # rexp("SDPA", "sdpa-decoder", use_half="bfloat16", use_compile="max-autotune") + rexp("fp32", "default", print_header=print_header) + print_header = False + rexp("bf16", "codesign", use_half="bfloat16") + rexp("compile", "codesign", use_half="bfloat16", use_compile="max-autotune") + rexp("SDPA", "sdpa-decoder", use_half="bfloat16", use_compile="max-autotune") rexp("Triton", "local-fork", use_half="bfloat16", use_compile="max-autotune") - # if batch_size > 1: - # rexp("NT", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1)) - # rexp("int8", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1), compress="dynamic_quant") - # rexp("sparse", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1), compress="sparse") + if batch_size > 1: + rexp("NT", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1)) + rexp("int8", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1), compress="dynamic_quant") + rexp("sparse", "local-fork", use_half="bfloat16", use_compile="max-autotune", use_nested_tensor=(batch_size > 1), compress="sparse") if __name__ == '__main__':