Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support to run only 1 time to get gpu_time only for speed task. #1417

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions api/common/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,12 @@ def layers(self, api_name, module_name=None, **kwargs):
def append_gradients(self, targets, inputs):
pass

def get_running_stats(self, use_gpu, config, runtimes, walltimes=None):
def get_running_stats(self,
use_gpu,
config,
runtimes,
walltimes=None,
repeat=None):
try:
module_name = "torch" if self._framework == "pytorch" else self._framework
module = importlib.import_module(module_name)
Expand All @@ -143,9 +148,17 @@ def get_running_stats(self, use_gpu, config, runtimes, walltimes=None):
if walltimes is not None:
stats["wall_time"] = walltimes

flop, byte = self.compute_flop_and_byte(config)
if flop is not None:
stats["flop"] = flop
if byte is not None:
stats["byte"] = byte
if repeat is not None:
stats["repeat"] = repeat

try:
flop, byte = self.compute_flop_and_byte(config)
if flop is not None:
stats["flop"] = flop
if byte is not None:
stats["byte"] = byte
except Exception:
print("Failed to call compute_flops_and_byte for %s." %
(self._framework))

return stats
2 changes: 2 additions & 0 deletions api/common/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@ def _set_args(args, arg, value):
if task == "speed":
args.benchmark_script_args.append(" --gpu_time ")
args.benchmark_script_args.append(str(output_time))
_set_args(args.benchmark_script_args,
"--get_status_without_running", "True")
if task == "scheduling":
args.benchmark_script_args.append(" --scheduling_times ")
args.benchmark_script_args.append("\"" + str(output_time) + "\"")
Expand Down
9 changes: 7 additions & 2 deletions api/common/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,13 @@ def parse_args():
"task should be paddle, tensorflow, tf, pytorch, torch, both")

if args.get_status_without_running:
assert args.task == "scheduling", "task must be 'scheduling' if get_status_without_running is True."
assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True."
assert args.task in [
"speed", "scheduling"
], "task must be 'speed' or 'scheduling' if get_status_without_running is True."
if args.task == "speed":
assert args.gpu_time != 0, "gpu_time can't be 0 if task is 'speed' and get_status_without_running is True."
if args.task == "scheduling":
assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True."

if args.task == "accuracy":
args.repeat = 1
Expand Down
3 changes: 2 additions & 1 deletion api/common/paddle_op_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,8 @@ def _run_main_iter(step=1):
# "_run_main_iter" needs to be executed firstly because
# parameter "self._backward" needs to be update.
if get_status_without_running:
stats = self.get_running_stats(use_gpu, config, None)
stats = self.get_running_stats(
use_gpu, config, runtimes=None, repeat=repeat)
return None, stats

runtimes = []
Expand Down
133 changes: 75 additions & 58 deletions api/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,33 +270,27 @@ def check_outputs(output_list,
sys.exit(1)


def print_benchmark_result(result,
task="speed",
log_level=0,
config_params=None):
assert isinstance(result, dict), "Input result should be a dict."
def _print_runtime(log_level, runtimes, walltimes):
if runtimes is None:
return

status = collections.OrderedDict()
status["framework"] = result["framework"]
status["version"] = result["version"]
status["name"] = result["name"]
status["device"] = result["device"]
status["backward"] = result["backward"]
# print all times
repeat = len(runtimes)
seg_range = [0, 0]
if log_level == 0:
seg_range = [0, repeat]
elif log_level == 1 and repeat > 20:
seg_range = [10, repeat - 10]
for i in range(repeat):
if i < seg_range[0] or i >= seg_range[1]:
walltime = walltimes[i] if walltimes is not None else 0
print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
(i, runtimes[i], walltime))

scheduling_times = result.get("scheduling_times", "{}")
if task == "scheduling" and scheduling_times is not None:
status["scheduling"] = eval(scheduling_times)

runtimes = result.get("total", None)
def _compute_average_runtime(runtimes, walltimes):
if runtimes is None:
status["parameters"] = config_params
print(json.dumps(status))
return

walltimes = result.get("wall_time", None)
gpu_time = result.get("gpu_time", None)
stable = result.get("stable", None)
diff = result.get("diff", None)
return 0, 0, 0, 0

repeat = len(runtimes)
for i in range(repeat):
Expand All @@ -320,47 +314,70 @@ def print_benchmark_result(result,
avg_walltime = np.average(np.sort(walltimes)[begin:end])
else:
avg_walltime = 0
return begin, end, avg_runtime, avg_walltime

# print all times
seg_range = [0, 0]
if log_level == 0:
seg_range = [0, repeat]
elif log_level == 1 and repeat > 20:
seg_range = [10, repeat - 10]
for i in range(len(runtimes)):
if i < seg_range[0] or i >= seg_range[1]:
walltime = walltimes[i] if walltimes is not None else 0
print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
(i, runtimes[i], walltime))

if avg_runtime - avg_walltime > 0.001:
total = avg_runtime - avg_walltime
else:
print(
"Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
% (avg_runtime, avg_walltime))
total = 0.001
def print_benchmark_result(result,
task="speed",
log_level=0,
config_params=None):
assert isinstance(result, dict), "Input result should be a dict."

status = collections.OrderedDict()
status["framework"] = result["framework"]
status["version"] = result["version"]
status["name"] = result["name"]
status["device"] = result["device"]
status["backward"] = result["backward"]

scheduling_times = result.get("scheduling_times", "{}")
if task == "scheduling" and scheduling_times is not None:
status["scheduling"] = eval(scheduling_times)
status["parameters"] = config_params
print(json.dumps(status))
return

stable = result.get("stable", None)
diff = result.get("diff", None)
if stable is not None and diff is not None:
status["precision"] = collections.OrderedDict()
status["precision"]["stable"] = stable
status["precision"]["diff"] = diff
status["speed"] = collections.OrderedDict()
status["speed"]["repeat"] = repeat
status["speed"]["begin"] = begin
status["speed"]["end"] = end
status["speed"]["total"] = total
status["speed"]["wall_time"] = avg_walltime
status["speed"]["total_include_wall_time"] = avg_runtime
if gpu_time is not None:
avg_gpu_time = gpu_time / repeat
status["speed"]["gpu_time"] = avg_gpu_time

flop = result.get("flop", None)
byte = result.get("byte", None)
if flop is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time
if byte is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time

if task == "speed":
runtimes = result.get("total", None)
walltimes = result.get("wall_time", None)
gpu_time = result.get("gpu_time", None)

repeat = len(runtimes) if runtimes is not None else result.get(
"repeat", 1)
begin, end, avg_runtime, avg_walltime = _compute_average_runtime(
runtimes, walltimes)
_print_runtime(log_level, runtimes, walltimes)
if avg_runtime - avg_walltime > 0.001:
total = avg_runtime - avg_walltime
else:
print(
"Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
% (avg_runtime, avg_walltime))
total = 0.001

status["speed"] = collections.OrderedDict()
status["speed"]["repeat"] = repeat
status["speed"]["begin"] = begin
status["speed"]["end"] = end
status["speed"]["total"] = total
status["speed"]["wall_time"] = avg_walltime
status["speed"]["total_include_wall_time"] = avg_runtime
if gpu_time is not None:
avg_gpu_time = gpu_time / repeat
status["speed"]["gpu_time"] = avg_gpu_time

flop = result.get("flop", None)
byte = result.get("byte", None)
if flop is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time
if byte is not None and abs(avg_gpu_time) > 1E-6:
status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time
status["parameters"] = config_params
print(json.dumps(status))