diff --git a/docs/module_usage/instructions/benchmark.md b/docs/module_usage/instructions/benchmark.md index 3f117fd98..3365ecd54 100644 --- a/docs/module_usage/instructions/benchmark.md +++ b/docs/module_usage/instructions/benchmark.md @@ -6,7 +6,7 @@ PaddleX 支持统计模型推理耗时,需通过环境变量进行设置,具 * `PADDLE_PDX_INFER_BENCHMARK_WARMUP`:设置 warm up,在开始测试前,使用随机数据循环迭代 n 次,默认为 `0`; * `PADDLE_PDX_INFER_BENCHMARK_DATA_SIZE`: 设置随机数据的尺寸,默认为 `224`; * `PADDLE_PDX_INFER_BENCHMARK_ITER`:使用随机数据进行 Benchmark 测试的循环次数,仅当输入数据为 `None` 时,将使用随机数据进行测试; -* `PADDLE_PDX_INFER_BENCHMARK_OUTPUT`:用于设置保存本次 benchmark 指标到 `txt` 文件,如 `./benchmark.txt`,默认为 `None`,表示不保存 Benchmark 指标; +* `PADDLE_PDX_INFER_BENCHMARK_OUTPUT`:用于设置保存的目录,如 `./benchmark`,默认为 `None`,表示不保存 Benchmark 指标; 使用示例如下: @@ -15,55 +15,60 @@ PADDLE_PDX_INFER_BENCHMARK=True \ PADDLE_PDX_INFER_BENCHMARK_WARMUP=5 \ PADDLE_PDX_INFER_BENCHMARK_DATA_SIZE=320 \ PADDLE_PDX_INFER_BENCHMARK_ITER=10 \ -PADDLE_PDX_INFER_BENCHMARK_OUTPUT=./benchmark.txt \ +PADDLE_PDX_INFER_BENCHMARK_OUTPUT=./benchmark \ python main.py \ -c ./paddlex/configs/object_detection/PicoDet-XS.yaml \ -o Global.mode=predict \ -o Predict.model_dir=None \ + -o Predict.batch_size=2 \ -o Predict.input=None ``` 在开启 Benchmark 后,将自动打印 benchmark 指标: ``` -+----------------+-----------------+------+---------------+ -| Stage | Total Time (ms) | Nums | Avg Time (ms) | -+----------------+-----------------+------+---------------+ -| ReadCmp | 185.48870087 | 10 | 18.54887009 | -| Resize | 16.95227623 | 30 | 0.56507587 | -| Normalize | 41.12100601 | 30 | 1.37070020 | -| ToCHWImage | 0.05745888 | 30 | 0.00191530 | -| Copy2GPU | 14.58549500 | 10 | 1.45854950 | -| Infer | 100.14462471 | 10 | 10.01446247 | -| Copy2CPU | 9.54508781 | 10 | 0.95450878 | -| DetPostProcess | 0.56767464 | 30 | 0.01892249 | -+----------------+-----------------+------+---------------+ -+-------------+-----------------+------+---------------+ -| Stage | Total Time (ms) | Nums | Avg Time (ms) | -+-------------+-----------------+------+---------------+ -| PreProcess | 243.61944199 | 30 | 8.12064807 | -| Inference | 124.27520752 | 30 | 4.14250692 | -| PostProcess | 0.56767464 | 30 | 0.01892249 | -| End2End | 379.70948219 | 30 | 12.65698274 | -| WarmUp | 9465.68179131 | 5 | 1893.13635826 | -+-------------+-----------------+------+---------------+ ++----------------+-----------------+-----------------+------------------------+ +| Component | Total Time (ms) | Number of Calls | Avg Time Per Call (ms) | ++----------------+-----------------+-----------------+------------------------+ +| ReadCmp | 102.39458084 | 10 | 10.23945808 | +| Resize | 11.20400429 | 20 | 0.56020021 | +| Normalize | 34.11078453 | 20 | 1.70553923 | +| ToCHWImage | 0.05555153 | 20 | 0.00277758 | +| Copy2GPU | 9.10568237 | 10 | 0.91056824 | +| Infer | 98.22225571 | 10 | 9.82222557 | +| Copy2CPU | 14.30845261 | 10 | 1.43084526 | +| DetPostProcess | 0.45251846 | 20 | 0.02262592 | ++----------------+-----------------+-----------------+------------------------+ ++-------------+-----------------+---------------------+----------------------------+ +| Stage | Total Time (ms) | Number of Instances | Avg Time Per Instance (ms) | ++-------------+-----------------+---------------------+----------------------------+ +| PreProcess | 147.76492119 | 20 | 7.38824606 | +| Inference | 121.63639069 | 20 | 6.08181953 | +| PostProcess | 0.45251846 | 20 | 0.02262592 | +| End2End | 294.03519630 | 20 | 14.70175982 | +| WarmUp | 7937.82591820 | 5 | 1587.56518364 | ++-------------+-----------------+---------------------+----------------------------+ ``` -在 Benchmark 结果中,会统计该模型全部组件(`Component`)的总耗时(`Total Time`,单位为“毫秒”)、**调用次数**(`Nums`)、**调用**平均执行耗时(`Avg Time`,单位为“毫秒”),以及按预热(`WarmUp`)、预处理(`PreProcess`)、模型推理(`Inference`)、后处理(`PostProcess`)和端到端(`End2End`)进行划分的耗时统计,包括每个阶段的总耗时(`Total Time`,单位为“毫秒”)、**样本数**(`Nums`)和**单样本**平均执行耗时(`Avg Time`,单位为“毫秒”),同时,保存相关指标会到本地 `./benchmark.csv` 文件中: +在 Benchmark 结果中,会统计该模型全部组件(`Component`)的总耗时(`Total Time`,单位为“毫秒”)、**调用次数**(`Number of Calls`)、**调用**平均执行耗时(`Avg Time Per Call`,单位“毫秒”),以及按预热(`WarmUp`)、预处理(`PreProcess`)、模型推理(`Inference`)、后处理(`PostProcess`)和端到端(`End2End`)进行划分的耗时统计,包括每个阶段的总耗时(`Total Time`,单位为“毫秒”)、**样本数**(`Number of Instances`)和**单样本**平均执行耗时(`Avg Time Per Instance`,单位“毫秒”),同时,上述指标会保存到到本地: `./benchmark/detail.csv` 和 `./benchmark/summary.csv`: ```csv -Stage,Total Time (ms),Nums,Avg Time (ms) -ReadCmp,0.18548870086669922,10,0.018548870086669923 -Resize,0.0169522762298584,30,0.0005650758743286133 -Normalize,0.04112100601196289,30,0.001370700200398763 -ToCHWImage,5.745887756347656e-05,30,1.915295918782552e-06 -Copy2GPU,0.014585494995117188,10,0.0014585494995117188 -Infer,0.10014462471008301,10,0.0100144624710083 -Copy2CPU,0.009545087814331055,10,0.0009545087814331055 -DetPostProcess,0.0005676746368408203,30,1.892248789469401e-05 -PreProcess,0.24361944198608398,30,0.0081206480662028 -Inference,0.12427520751953125,30,0.0041425069173177086 -PostProcess,0.0005676746368408203,30,1.892248789469401e-05 -End2End,0.37970948219299316,30,0.012656982739766438 -WarmUp,9.465681791305542,5,1.8931363582611085 +Component,Total Time (ms),Number of Calls,Avg Time Per Call (ms) +ReadCmp,0.10199093818664551,10,0.01019909381866455 +Resize,0.011309385299682617,20,0.0005654692649841309 +Normalize,0.035140275955200195,20,0.0017570137977600097 +ToCHWImage,4.744529724121094e-05,20,2.3722648620605467e-06 +Copy2GPU,0.00861215591430664,10,0.000861215591430664 +Infer,0.820899248123169,10,0.08208992481231689 +Copy2CPU,0.006002187728881836,10,0.0006002187728881836 +DetPostProcess,0.0004436969757080078,20,2.218484878540039e-05 +``` + +```csv +Stage,Total Time (ms),Number of Instance,Avg Time Per Instance (ms) +PreProcess,0.14848804473876953,20,0.007424402236938477 +Inference,0.8355135917663574,20,0.04177567958831787 +PostProcess,0.0004436969757080078,20,2.218484878540039e-05 +End2End,1.0054960250854492,20,0.05027480125427246 +WarmUp,8.869974851608276,5,1.7739949703216553 ``` diff --git a/paddlex/configs/object_detection/PicoDet-S.yaml b/paddlex/configs/object_detection/PicoDet-S.yaml index 33f41f034..202d012e7 100644 --- a/paddlex/configs/object_detection/PicoDet-S.yaml +++ b/paddlex/configs/object_detection/PicoDet-S.yaml @@ -33,7 +33,7 @@ Export: weight_path: https://paddledet.bj.bcebos.com/models/picodet_s_320_coco_lcnet.pdparams Predict: - batch_size: 3 + batch_size: 1 model_dir: "output/best_model/inference" input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png" kernel_option: diff --git a/paddlex/inference/utils/benchmark.py b/paddlex/inference/utils/benchmark.py index 0c25c1813..a45b36287 100644 --- a/paddlex/inference/utils/benchmark.py +++ b/paddlex/inference/utils/benchmark.py @@ -16,6 +16,7 @@ import functools from types import GeneratorType import time +from pathlib import Path import numpy as np from prettytable import PrettyTable @@ -116,8 +117,13 @@ def collect(self, e2e_num): self._e2e_elapse = time.time() - self._e2e_tic detail, summary = self.gather(e2e_num) - table_head = ["Stage", "Total Time (ms)", "Nums", "Avg Time (ms)"] - table = PrettyTable(table_head) + detail_head = [ + "Component", + "Total Time (ms)", + "Number of Calls", + "Avg Time Per Call (ms)", + ] + table = PrettyTable(detail_head) table.add_rows( [ (name, f"{total * 1000:.8f}", cnts, f"{avg * 1000:.8f}") @@ -126,7 +132,13 @@ def collect(self, e2e_num): ) logging.info(table) - table = PrettyTable(table_head) + summary_head = [ + "Stage", + "Total Time (ms)", + "Number of Instances", + "Avg Time Per Instance (ms)", + ] + table = PrettyTable(summary_head) table.add_rows( [ (name, f"{total * 1000:.8f}", cnts, f"{avg * 1000:.8f}") @@ -136,10 +148,17 @@ def collect(self, e2e_num): logging.info(table) if INFER_BENCHMARK_OUTPUT: - csv_data = [table_head] - csv_data.extend(detail) - csv_data.extend(summary) - with open("benchmark.csv", "w", newline="") as file: + save_dir = Path(INFER_BENCHMARK_OUTPUT) + save_dir.mkdir(parents=True, exist_ok=True) + csv_data = [detail_head, *detail] + # csv_data.extend(detail) + with open(Path(save_dir) / "detail.csv", "w", newline="") as file: + writer = csv.writer(file) + writer.writerows(csv_data) + + csv_data = [summary_head, *summary] + # csv_data.extend(summary) + with open(Path(save_dir) / "summary.csv", "w", newline="") as file: writer = csv.writer(file) writer.writerows(csv_data)