Skip to content

Commit

Permalink
Support to run only 1 time to get gpu_time only for speed task.
Browse files Browse the repository at this point in the history
  • Loading branch information
Xreki committed Mar 23, 2022
1 parent c65f4be commit 977b398
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 68 deletions.
25 changes: 19 additions & 6 deletions api/common/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,12 @@ def layers(self, api_name, module_name=None, **kwargs):
def append_gradients(self, targets, inputs):
pass

def get_running_stats(self, use_gpu, config, runtimes, walltimes=None):
def get_running_stats(self,
use_gpu,
config,
runtimes,
walltimes=None,
repeat=None):
try:
module_name = "torch" if self._framework == "pytorch" else self._framework
module = importlib.import_module(module_name)
Expand All @@ -73,9 +78,17 @@ def get_running_stats(self, use_gpu, config, runtimes, walltimes=None):
if walltimes is not None:
stats["wall_time"] = walltimes

flop, byte = self.compute_flop_and_byte(config)
if flop is not None:
stats["flop"] = flop
if byte is not None:
stats["byte"] = byte
if repeat is not None:
stats["repeat"] = repeat

try:
flop, byte = self.compute_flop_and_byte(config)
if flop is not None:
stats["flop"] = flop
if byte is not None:
stats["byte"] = byte
except Exception:
print("Failed to call compute_flops_and_byte for %s." %
(self._framework))

return stats
2 changes: 2 additions & 0 deletions api/common/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,8 @@ def _set_args(args, arg, value):
if task == "speed":
args.benchmark_script_args.append(" --gpu_time ")
args.benchmark_script_args.append(str(output_time))
_set_args(args.benchmark_script_args,
"--get_status_without_running", "True")
if task == "scheduling":
args.benchmark_script_args.append(" --scheduling_times ")
args.benchmark_script_args.append("\"" + str(output_time) + "\"")
Expand Down
9 changes: 7 additions & 2 deletions api/common/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,13 @@ def parse_args():
"task should be paddle, tensorflow, tf, pytorch, torch, both")

if args.get_status_without_running:
assert args.task == "scheduling", "task must be 'scheduling' if get_status_without_running is True."
assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True."
assert args.task in [
"speed", "scheduling"
], "task must be 'speed' or 'scheduling' if get_status_without_running is True."
if args.task == "speed":
assert args.gpu_time != 0, "gpu_time can't be 0 if task is 'speed' and get_status_without_running is True."
if args.task == "scheduling":
assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True."

if args.task == "accuracy":
args.repeat = 1
Expand Down
3 changes: 2 additions & 1 deletion api/common/paddle_op_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,8 @@ def _run_main_iter(step=1):
# "_run_main_iter" needs to be executed firstly because
# parameter "self._backward" needs to be update.
if get_status_without_running:
stats = self.get_running_stats(use_gpu, config, None)
stats = self.get_running_stats(
use_gpu, config, runtimes=None, repeat=repeat)
return None, stats

runtimes = []
Expand Down
134 changes: 75 additions & 59 deletions api/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,33 +270,27 @@ def check_outputs(output_list,
sys.exit(1)


def print_benchmark_result(result,
task="speed",
log_level=0,
config_params=None):
assert isinstance(result, dict), "Input result should be a dict."
def _print_runtime(log_level, runtimes, walltimes):
if runtimes is None:
return

status = collections.OrderedDict()
status["framework"] = result["framework"]
status["version"] = result["version"]
status["name"] = result["name"]
status["device"] = result["device"]
status["backward"] = result["backward"]
# print all times
repeat = len(runtimes)
seg_range = [0, 0]
if log_level == 0:
seg_range = [0, repeat]
elif log_level == 1 and repeat > 20:
seg_range = [10, repeat - 10]
for i in range(repeat):
if i < seg_range[0] or i >= seg_range[1]:
walltime = walltimes[i] if walltimes is not None else 0
print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
(i, runtimes[i], walltime))

scheduling_times = result.get("scheduling_times", "{}")
if task == "scheduling" and scheduling_times is not None:
status["scheduling"] = eval(scheduling_times)

runtimes = result.get("total", None)
def _compute_average_runtime(runtimes, walltimes):
if runtimes is None:
status["parameters"] = config_params
print(json.dumps(status))
return

walltimes = result.get("wall_time", None)
gpu_time = result.get("gpu_time", None)
stable = result.get("stable", None)
diff = result.get("diff", None)
return 0, 0, 0, 0

repeat = len(runtimes)
for i in range(repeat):
Expand All @@ -320,47 +314,69 @@ def print_benchmark_result(result,
avg_walltime = np.average(np.sort(walltimes)[begin:end])
else:
avg_walltime = 0
return begin, end, avg_runtime, avg_walltime

# print all times
seg_range = [0, 0]
if log_level == 0:
seg_range = [0, repeat]
elif log_level == 1 and repeat > 20:
seg_range = [10, repeat - 10]
for i in range(len(runtimes)):
if i < seg_range[0] or i >= seg_range[1]:
walltime = walltimes[i] if walltimes is not None else 0
print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
(i, runtimes[i], walltime))

if avg_runtime - avg_walltime > 0.001:
total = avg_runtime - avg_walltime
else:
print(
"Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
% (avg_runtime, avg_walltime))
total = 0.001
def print_benchmark_result(result,
task="speed",
log_level=0,
config_params=None):
assert isinstance(result, dict), "Input result should be a dict."

status = collections.OrderedDict()
status["framework"] = result["framework"]
status["version"] = result["version"]
status["name"] = result["name"]
status["device"] = result["device"]
status["backward"] = result["backward"]
status["parameters"] = config_params

scheduling_times = result.get("scheduling_times", "{}")
if task == "scheduling" and scheduling_times is not None:
status["scheduling"] = eval(scheduling_times)
print(json.dumps(status))
return

stable = result.get("stable", None)
diff = result.get("diff", None)
if stable is not None and diff is not None:
status["precision"] = collections.OrderedDict()
status["precision"]["stable"] = stable
status["precision"]["diff"] = diff
status["speed"] = collections.OrderedDict()
status["speed"]["repeat"] = repeat
status["speed"]["begin"] = begin
status["speed"]["end"] = end
status["speed"]["total"] = total
status["speed"]["wall_time"] = avg_walltime
status["speed"]["total_include_wall_time"] = avg_runtime
if gpu_time is not None:
avg_gpu_time = gpu_time / repeat
status["speed"]["gpu_time"] = avg_gpu_time

flop = result.get("flop", None)
byte = result.get("byte", None)
if flop is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time
if byte is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time
status["parameters"] = config_params

if task == "speed":
runtimes = result.get("total", None)
walltimes = result.get("wall_time", None)
gpu_time = result.get("gpu_time", None)

repeat = len(runtimes) if runtimes is not None else result.get(
"repeat", 1)
begin, end, avg_runtime, avg_walltime = _compute_average_runtime(
runtimes, walltimes)
_print_runtime(log_level, runtimes, walltimes)
if avg_runtime - avg_walltime > 0.001:
total = avg_runtime - avg_walltime
else:
print(
"Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
% (avg_runtime, avg_walltime))
total = 0.001

status["speed"] = collections.OrderedDict()
status["speed"]["repeat"] = repeat
status["speed"]["begin"] = begin
status["speed"]["end"] = end
status["speed"]["total"] = total
status["speed"]["wall_time"] = avg_walltime
status["speed"]["total_include_wall_time"] = avg_runtime
if gpu_time is not None:
avg_gpu_time = gpu_time / repeat
status["speed"]["gpu_time"] = avg_gpu_time

flop = result.get("flop", None)
byte = result.get("byte", None)
if flop is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time
if byte is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time
print(json.dumps(status))

0 comments on commit 977b398

Please sign in to comment.