Skip to content

Commit

Permalink
Remove process_latencies from ConstructSUT (mlcommons#1152)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 authored May 23, 2022
1 parent e0223ff commit fb79019
Show file tree
Hide file tree
Showing 20 changed files with 24 additions and 156 deletions.
9 changes: 2 additions & 7 deletions compliance/nvidia/TEST04-B/main_vision_test04b_war.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def run_one_item(self, qitem):
processed_results.extend(self.post_process(results, qitem.content_id, qitem.label, self.result_dict))
if self.take_accuracy:
self.post_process.add_results(processed_results)
self.result_timing.append(time.time() - qitem.start)
self.result_timing.append(time.time() - qitem.start)
except Exception as ex: # pylint: disable=broad-except
src = [self.ds.get_item_loc(i) for i in qitem.content_id]
log.error("thread: failed on contentid=%s, %s", src, ex)
Expand Down Expand Up @@ -482,11 +482,6 @@ def issue_queries(query_samples):
def flush_queries():
pass

def process_latencies(latencies_ns):
# called by loadgen to show us the recorded latencies
global last_timeing
last_timeing = [t / NANO_SEC for t in latencies_ns]

settings = lg.TestSettings()
settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
settings.FromConfig(user_conf, args.model_name, args.scenario)
Expand Down Expand Up @@ -517,7 +512,7 @@ def process_latencies(latencies_ns):
settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
settings.multi_stream_expected_latency_ns = int(args.max_latency * NANO_SEC)

sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
sut = lg.ConstructSUT(issue_queries, flush_queries)
qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples)

log.info("starting {}".format(scenario))
Expand Down
5 changes: 1 addition & 4 deletions language/bert/onnxruntime_SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, args):
self.sess = onnxruntime.InferenceSession(model_path, self.options)

print("Constructing SUT...")
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries)
print("Finished constructing SUT.")

self.qsl = get_squad_QSL(args.max_examples)
Expand Down Expand Up @@ -72,9 +72,6 @@ def issue_queries(self, query_samples):
def flush_queries(self):
pass

def process_latencies(self, latencies_ns):
pass

def __del__(self):
if self.profile:
print("ONNX runtime profile dumped to: '{}'".format(self.sess.end_profiling()))
Expand Down
5 changes: 1 addition & 4 deletions language/bert/pytorch_SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, args):
self.model.load_state_dict(torch.load("build/data/bert_tf_v1_1_large_fp32_384_v2/model.pytorch"), strict=True)

print("Constructing SUT...")
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries)
print("Finished constructing SUT.")

self.qsl = get_squad_QSL(args.max_examples)
Expand All @@ -85,9 +85,6 @@ def issue_queries(self, query_samples):
def flush_queries(self):
pass

def process_latencies(self, latencies_ns):
pass

def __del__(self):
print("Finished destroying SUT.")

Expand Down
5 changes: 1 addition & 4 deletions language/bert/tf_SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self, args):
tf.import_graph_def(graph_def, name='')

print("Constructing SUT...")
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries)
print("Finished constructing SUT.")

self.qsl = get_squad_QSL(args.max_examples)
Expand All @@ -72,9 +72,6 @@ def issue_queries(self, query_samples):
def flush_queries(self):
pass

def process_latencies(self, latencies_ns):
pass

def __del__(self):
print("Finished destroying SUT.")

Expand Down
5 changes: 1 addition & 4 deletions language/bert/tf_estimator_SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self, batch_size=8):
self.batch_size = batch_size

print("Constructing SUT...")
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries)
print("Finished constructing SUT.")

self.qsl = get_squad_QSL()
Expand Down Expand Up @@ -78,9 +78,6 @@ def input_fn():
def flush_queries(self):
pass

def process_latencies(self, latencies_ns):
pass

def __del__(self):
print("Finished destroying SUT.")

Expand Down
15 changes: 1 addition & 14 deletions loadgen/demos/py_demo_multi_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from absl import app
import mlperf_loadgen
import numpy

from datetime import datetime

Expand Down Expand Up @@ -61,17 +60,6 @@ def flush_queries():
pass


def process_latencies(latencies_ns):
print("Average latency: ")
print(numpy.mean(latencies_ns))
print("Median latency: ")
print(numpy.percentile(latencies_ns, 50))
print("90 percentile latency: ")
print(numpy.percentile(latencies_ns, 90))
print("99 percentile latency: ")
print(numpy.percentile(latencies_ns, 99))


def main(argv):
del argv
settings = mlperf_loadgen.TestSettings()
Expand All @@ -82,8 +70,7 @@ def main(argv):
settings.min_query_count = 100
settings.min_duration_ms = 10000

sut = mlperf_loadgen.ConstructSUT(
issue_query, flush_queries, process_latencies)
sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
1024, 128, load_samples_to_ram, unload_samples_from_ram)
mlperf_loadgen.StartTest(sut, qsl, settings)
Expand Down
13 changes: 1 addition & 12 deletions loadgen/demos/py_demo_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from absl import app
import mlperf_loadgen
import numpy


def load_samples_to_ram(query_samples):
Expand Down Expand Up @@ -59,24 +58,14 @@ def flush_queries():
pass


def process_latencies(latencies_ns):
print("Average latency: ")
print(numpy.mean(latencies_ns))
print("Median latency: ")
print(numpy.percentile(latencies_ns, 50))
print("90 percentile latency: ")
print(numpy.percentile(latencies_ns, 90))


def main(argv):
del argv
settings = mlperf_loadgen.TestSettings()
settings.scenario = mlperf_loadgen.TestScenario.Offline
settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
settings.offline_expected_qps = 1000

sut = mlperf_loadgen.ConstructSUT(
issue_query, flush_queries, process_latencies)
sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
1024, 128, load_samples_to_ram, unload_samples_from_ram)
mlperf_loadgen.StartTest(sut, qsl, settings)
Expand Down
13 changes: 1 addition & 12 deletions loadgen/demos/py_demo_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from absl import app
import mlperf_loadgen
import numpy


def load_samples_to_ram(query_samples):
Expand Down Expand Up @@ -53,15 +52,6 @@ def flush_queries():
pass


def process_latencies(latencies_ns):
print("Average latency: ")
print(numpy.mean(latencies_ns))
print("Median latency: ")
print(numpy.percentile(latencies_ns, 50))
print("99 percentile latency: ")
print(numpy.percentile(latencies_ns, 99))


def main(argv):
del argv
settings = mlperf_loadgen.TestSettings()
Expand All @@ -72,8 +62,7 @@ def main(argv):
settings.min_query_count = 100
settings.min_duration_ms = 10000

sut = mlperf_loadgen.ConstructSUT(
issue_query, flush_queries, process_latencies)
sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
1024, 128, load_samples_to_ram, unload_samples_from_ram)
mlperf_loadgen.StartTest(sut, qsl, settings)
Expand Down
13 changes: 1 addition & 12 deletions loadgen/demos/py_demo_single_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

from absl import app
import mlperf_loadgen
import numpy


def load_samples_to_ram(query_samples):
Expand Down Expand Up @@ -62,15 +61,6 @@ def flush_queries():
pass


def process_latencies(latencies_ns):
print("Average latency: ")
print(numpy.mean(latencies_ns))
print("Median latency: ")
print(numpy.percentile(latencies_ns, 50))
print("90 percentile latency: ")
print(numpy.percentile(latencies_ns, 90))


def main(argv):
del argv
settings = mlperf_loadgen.TestSettings()
Expand All @@ -80,8 +70,7 @@ def main(argv):
settings.min_query_count = 100
settings.min_duration_ms = 10000

sut = mlperf_loadgen.ConstructSUT(
issue_query, flush_queries, process_latencies)
sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
1024, 128, load_samples_to_ram, unload_samples_from_ram)
mlperf_loadgen.StartTest(sut, qsl, settings)
Expand Down
13 changes: 1 addition & 12 deletions loadgen/tests/perftests_null_sut.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from __future__ import print_function
from absl import app
import mlperf_loadgen
import numpy


def load_samples_to_ram(query_samples):
Expand All @@ -43,23 +42,13 @@ def flush_queries():
pass


def process_latencies(latencies_ns):
print("Average latency: ")
print(numpy.mean(latencies_ns))
print("Median latency: ")
print(numpy.percentile(latencies_ns, 50))
print("90 percentile latency: ")
print(numpy.percentile(latencies_ns, 90))


def main(argv):
del argv
settings = mlperf_loadgen.TestSettings()
settings.scenario = mlperf_loadgen.TestScenario.SingleStream
settings.mode = mlperf_loadgen.TestMode.PerformanceOnly

sut = mlperf_loadgen.ConstructSUT(
issue_query, flush_queries, process_latencies)
sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
1024 * 1024, 1024, load_samples_to_ram, unload_samples_from_ram)
mlperf_loadgen.StartTest(sut, qsl, settings)
Expand Down
9 changes: 2 additions & 7 deletions recommendation/dlrm/pytorch/python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def run_one_item(self, qitem):
processed_results = self.post_process(results, qitem.batch_T, self.result_dict)
if self.take_accuracy:
self.post_process.add_results(processed_results)
self.result_timing.append(time.time() - qitem.start)
self.result_timing.append(time.time() - qitem.start)
except Exception as ex: # pylint: disable=broad-except
log.error("thread: failed, %s", ex)
# since post_process will not run, fake empty responses
Expand Down Expand Up @@ -552,11 +552,6 @@ def issue_queries(query_samples):
def flush_queries():
pass

def process_latencies(latencies_ns):
# called by loadgen to show us the recorded latencies
global last_timeing
last_timeing = [t / NANO_SEC for t in latencies_ns]

settings = lg.TestSettings()
settings.FromConfig(mlperf_conf, args.model_path, args.scenario)
settings.FromConfig(user_conf, args.model_path, args.scenario)
Expand Down Expand Up @@ -588,7 +583,7 @@ def process_latencies(latencies_ns):
settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
settings.multi_stream_expected_latency_ns = int(args.max_latency * NANO_SEC)

sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
sut = lg.ConstructSUT(issue_queries, flush_queries)
qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline), ds.load_query_samples, ds.unload_query_samples)

log.info("starting {}".format(scenario))
Expand Down
11 changes: 1 addition & 10 deletions speech_recognition/rnnt/pytorch_SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ def __init__(self, config_toml, checkpoint_path, dataset_dir,
rnnt_vocab = add_blank_label(dataset_vocab)
featurizer_config = config['input_eval']

self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
self.process_latencies)
self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries)
self.qsl = AudioQSLInMemory(dataset_dir,
manifest_filepath,
dataset_vocab,
Expand Down Expand Up @@ -111,14 +110,6 @@ def issue_queries(self, query_samples):
def flush_queries(self):
pass

def process_latencies(self, latencies_ns):
print("Average latency (ms) per query:")
print(np.mean(latencies_ns)/1000000.0)
print("Median latency (ms): ")
print(np.percentile(latencies_ns, 50)/1000000.0)
print("90 percentile latency (ms): ")
print(np.percentile(latencies_ns, 90)/1000000.0)

def __del__(self):
lg.DestroySUT(self.sut)
print("Finished destroying SUT.")
15 changes: 1 addition & 14 deletions translation/gnmt/tensorflow/generic_loadgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import threading
import time
import mlperf_loadgen
import numpy
import array

class ImplementationException (Exception):
Expand All @@ -30,18 +29,6 @@ def __repr__(self):

def flush_queries(): pass

##
# @brief Simple way to process and display latencies
# @param latencies_ns is an array of durations (in ns) it took per sample to finish
# @note that the duration is measured from query submission time to query finish time,
# hence the samples themselves could actually have been finished earlier
def process_latencies(latencies_ns):
print("Average latency (ms) per query:")
print(numpy.mean(latencies_ns)/1000000.0)
print("Median latency (ms): ")
print(numpy.percentile(latencies_ns, 50)/1000000.0)
print("90 percentile latency (ms): ")
print(numpy.percentile(latencies_ns, 90)/1000000.0)

class Task:
def __init__(self, query_id, sample_id):
Expand Down Expand Up @@ -152,7 +139,7 @@ def process(self, qitem):
total_queries = 256 # Maximum sample ID + 1
perf_queries = 8 # TBD: Doesn't seem to have an effect

sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries, process_latencies)
sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
total_queries, perf_queries, runner.load_samples_to_ram, runner.unload_samples_from_ram)
mlperf_loadgen.StartTest(sut, qsl, settings)
Expand Down
5 changes: 1 addition & 4 deletions translation/gnmt/tensorflow/loadgen_gnmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@

NANO_SEC = 1e9

def process_latencies_gnmt(latencies_ns):
print("Please consult loadgen log (./mlperf_log_summary.txt) for performance results.")

##
# @brief Translation task that contains 1 sentence ID.
class TranslationTask:
Expand Down Expand Up @@ -525,7 +522,7 @@ def handle_tasks(self):
total_queries = runner.getTotalNumSentences() # Maximum sample ID + 1
perf_queries = min(total_queries, 3003) # Select the same subset of $perf_queries samples

sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries, process_latencies_gnmt)
sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
total_queries, perf_queries, runner.load_samples_to_ram, runner.unload_samples_from_ram)

Expand Down
Loading

0 comments on commit fb79019

Please sign in to comment.