diff --git a/README.md b/README.md
index 8d07064..38434df 100644
--- a/README.md
+++ b/README.md
@@ -37,9 +37,9 @@ Check [this URL](./yolox_ros_cpp/README.md).
 
 <!-- - yolox/image_raw : Resized image (`sensor_msgs/Image`) -->
 
-- bounding_boxes: Output BoundingBoxes like darknet_ros_msgs (`bboxes_ex_msgs/BoundingBoxes`)
-
-  ※ If you want to use `darknet_ros_msgs` , replace `bboxes_ex_msgs` with `darknet_ros_msgs`.
+- bounding_boxes (`bboxes_ex_msgs/BoundingBoxes` or `vision_msgs/Detection2DArray`)
+  - `bboxes_ex_msgs/BoundingBoxes`: Output BoundingBoxes like darknet_ros_msgs
+  - ※ If you want to use `darknet_ros_msgs` , replace `bboxes_ex_msgs` with `darknet_ros_msgs`.
 
 <!-- ![yolox_topic](images_for_readme/yolox_topic.png) -->
 
diff --git a/weights/onnx/download.bash b/weights/onnx/download.bash
index 403c6d3..3b9a60b 100755
--- a/weights/onnx/download.bash
+++ b/weights/onnx/download.bash
@@ -1,23 +1,27 @@
 #!/bin/bash
 
-# if $1 is empty
-if [ -z "$1" ]; then
-    echo "Usage: $0 <target-model>"
-    echo "Target-Models :"
-    echo "yolox_tiny, yolox_nano, yolox_s, yolox_m, yolox_l, all"
-    exit 1
-fi
-MODEL=$1
-MODEL_VERSION=0.1.1rc0
-SCRIPT_DIR=$(cd $(dirname $0); pwd)
+function download {
+    # if $1 is empty
+    if [ -z "$1" ]; then
+        echo "Usage: $0 <target-model>"
+        echo "Target-Models :"
+        echo "yolox_tiny, yolox_nano, yolox_s, yolox_m, yolox_l, all"
+        return
+    fi
+    MODEL=$1
+    MODEL_VERSION=0.1.1rc0
+    SCRIPT_DIR=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
 
-echo $MODEL
-if [ "$MODEL" = "all" ]; then
-    wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_tiny.onnx -P $SCRIPT_DIR
-    wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_nano.onnx -P $SCRIPT_DIR
-    wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_s.onnx -P $SCRIPT_DIR
-    wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_m.onnx -P $SCRIPT_DIR
-    wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_l.onnx -P $SCRIPT_DIR
-else
-    wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/$MODEL.onnx -P $SCRIPT_DIR
-fi
+    echo $MODEL
+    if [ "$MODEL" = "all" ]; then
+        wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_tiny.onnx -P $SCRIPT_DIR
+        wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_nano.onnx -P $SCRIPT_DIR
+        wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_s.onnx -P $SCRIPT_DIR
+        wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_m.onnx -P $SCRIPT_DIR
+        wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/yolox_l.onnx -P $SCRIPT_DIR
+    else
+        wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/$MODEL_VERSION/$MODEL.onnx -P $SCRIPT_DIR
+    fi
+}
+
+download $1
diff --git a/weights/tensorrt/convert.bash b/weights/tensorrt/convert.bash
index 8331961..c90c736 100755
--- a/weights/tensorrt/convert.bash
+++ b/weights/tensorrt/convert.bash
@@ -1,30 +1,34 @@
 #!/bin/bash
 
-# if $1 is empty
-if [ -z "$1" ]; then
-    echo "Usage: $0 <target-model>"
-    echo "Target-Models : yolox_tiny, yolox_nano, yolox_s, yolox_m, yolox_l"
-    exit 1
-fi
+function convert {
+    # if $1 is empty
+    if [ -z "$1" ]; then
+        echo "Usage: $0 <target-model>"
+        echo "Target-Models : yolox_tiny, yolox_nano, yolox_s, yolox_m, yolox_l"
+        return
+    fi
 
-MODEL=$1
-SCRIPT_DIR=$(cd $(dirname $0); pwd)
+    MODEL=$1
+    SCRIPT_DIR=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
 
-echo "Model Name: ${MODEL}"
-echo ""
+    echo "Model Name: ${MODEL}"
+    echo ""
 
-ONNX_MODEL_PATH=$SCRIPT_DIR/../onnx/$MODEL.onnx
-if [ ! -e $ONNX_MODEL_PATH ]; then
-    $SCRIPT_DIR/../onnx/download.bash $MODEL
-fi
+    ONNX_MODEL_PATH=$SCRIPT_DIR/../onnx/$MODEL.onnx
+    if [ ! -e $ONNX_MODEL_PATH ]; then
+        $SCRIPT_DIR/../onnx/download.bash $MODEL
+    fi
 
-if [ ! -e $ONNX_MODEL_PATH ]; then
-    echo "[ERROR] Not Found ${ONNX_MODEL_PATH}"
-    echo "[ERROR] Please check target model name."
-    exit 1
-fi
+    if [ ! -e $ONNX_MODEL_PATH ]; then
+        echo "[ERROR] Not Found ${ONNX_MODEL_PATH}"
+        echo "[ERROR] Please check target model name."
+        return
+    fi
 
-/usr/src/tensorrt/bin/trtexec \
-    --onnx=$SCRIPT_DIR/../onnx/$MODEL.onnx \
-    --saveEngine=$SCRIPT_DIR/$MODEL.trt \
-    --fp16 --verbose
+    /usr/src/tensorrt/bin/trtexec \
+        --onnx=$SCRIPT_DIR/../onnx/$MODEL.onnx \
+        --saveEngine=$SCRIPT_DIR/$MODEL.trt \
+        --fp16 --verbose
+}
+
+convert $1
diff --git a/yolox_ros_cpp/README.md b/yolox_ros_cpp/README.md
index 8e06517..3e0b73f 100644
--- a/yolox_ros_cpp/README.md
+++ b/yolox_ros_cpp/README.md
@@ -3,6 +3,8 @@
 ## Requirements
 - ROS2 Iron
   - ros-iron-generate-parameter-library
+  - ros-iron-vision-msgs
+  - ros-iron-usb-cam
 - OpenCV 4.x
 - OpenVINO 2024.*
 - TensorRT 10.x *
@@ -30,8 +32,6 @@ git clone --recursive https://github.com/Ar-Ray-code/YOLOX-ROS -b iron
 cd ~/ros2_ws
 
 ./src/YOLOX-ROS/weights/onnx/download.bash yolox_tiny
-# Download onnx file and convert to IR format.
-# ./src/YOLOX-ROS/weights/openvino/download.bash yolox_tiny
 ```
 
 ### TensorRT
@@ -39,11 +39,11 @@ cd ~/ros2_ws
 cd ~/ros2_ws
 
 # Download onnx model and convert to TensorRT engine.
-# 1st arg is model name. 2nd is workspace size.
+# argument is model name. set yolox_tiny, yolox_nano, yolox_s, yolox_m or yolox_l.
 ./src/YOLOX-ROS/weights/tensorrt/convert.bash yolox_tiny
 ```
 
-#### Tensorflow Lite
+### Tensorflow Lite
 ```bash
 cd ~/ros2_ws
 
@@ -51,7 +51,7 @@ cd ~/ros2_ws
 ./src/YOLOX-ROS/weights/tflite/download_model.bash
 ```
 
-#### PINTO_model_zoo
+### PINTO_model_zoo
 - Support PINTO_model_zoo model
 - Download model using the following script.
   - https://github.com/PINTO0309/PINTO_model_zoo/blob/main/132_YOLOX/download_nano.sh
@@ -77,15 +77,15 @@ cd ~/ros2_ws
 # build with openvino
 source /opt/ros/humble/setup.bash
 source /opt/intel/openvino_2021/bin/setupvars.sh
-colcon build --cmake-args -DYOLOX_USE_OPENVINO=ON
+colcon build --symlink-install --cmake-args -DYOLOX_USE_OPENVINO=ON
 ```
 
 ### TensorRT
 
 ```bash
 # build with tensorrt
-source /opt/ros/humble/setup.bash
-colcon build --cmake-args -DYOLOX_USE_TENSORRT=ON
+source /opt/ros/iron/setup.bash
+colcon build --symlink-install --cmake-args -DYOLOX_USE_TENSORRT=ON
 ```
 
 ### TFLite
@@ -115,12 +115,13 @@ make -j"$(nproc)"
 ```
 
 ```bash
-colcon build --cmake-args \
-  -DYOLOX_USE_TFLITE=ON \
-  -DTFLITE_LIB_PATH=${WORKSPACE}/tflite_build \
-  -DTFLITE_INCLUDE_DIR=${WORKSPACE}/tensorflow_src/ \
-  -DABSEIL_CPP_ICLUDE_DIR=${WORKSPACE}/tflite_build/abseil-cpp \
-  -DFLATBUFFERS_INCLUDE_DIR=${WORKSPACE}/tflite_build/flatbuffers/include
+colcon build --symlink-install \
+   --cmake-args \
+    -DYOLOX_USE_TFLITE=ON \
+    -DTFLITE_LIB_PATH=${WORKSPACE}/tflite_build \
+    -DTFLITE_INCLUDE_DIR=${WORKSPACE}/tensorflow_src/ \
+    -DABSEIL_CPP_ICLUDE_DIR=${WORKSPACE}/tflite_build/abseil-cpp \
+    -DFLATBUFFERS_INCLUDE_DIR=${WORKSPACE}/tflite_build/flatbuffers/include
 ```
 
 <br>
@@ -139,10 +140,6 @@ ros2 launch yolox_ros_cpp yolox_openvino.launch.py
 # ros2 launch yolox_ros_cpp yolox_openvino.launch.py \
 #     model_path:=install/yolox_ros_cpp/share/yolox_ros_cpp/weights/onnx/yolox_tiny_480x640.onnx \
 #     model_version:="0.1.0"
-
-## run YOLOX-tiny with NCS2
-# ros2 launch yolox_ros_cpp yolox_openvino_ncs2.launch.py
-
 ```
 
 ### TensorRT
@@ -156,20 +153,20 @@ ros2 launch yolox_ros_cpp yolox_tensorrt.launch.py
 
 ```
 
-### Jetson + TensorRT
+<!-- ### Jetson + TensorRT
 Jetson docker container cannot display GUI.
 If you want to show image with bounding box drawn, subscribe from host jetson or other PC.
 
 ```bash
 # run yolox_tiny
 ros2 launch yolox_ros_cpp yolox_tensorrt_jetson.launch.py
-```
+``` -->
 
-<!-- ### ONNXRuntime
+### ONNXRuntime
 ```bash
 # run yolox_tiny
 ros2 launch yolox_ros_cpp yolox_onnxruntime.launch.py
-``` -->
+```
 
 ### Tensorflow Lite
 ```bash
@@ -188,19 +185,21 @@ ros2 launch yolox_ros_cpp yolox_tflite.launch.py
 <details>
 <summary>OpenVINO example</summary>
 
-- `model_path`: ./install/yolox_ros_cpp/share/yolox_ros_cpp/weights/openvino/yolox_tiny.xml
+- `model_path`: ./src/YOLOX-ROS/weights/onnx/yolox_tiny.onnx
 - `p6`: false
 - `class_labels_path`: ""
   - if not set, use coco_names.
   - See [here](https://github.com/fateshelled/YOLOX-ROS/blob/dev_cpp/yolox_ros_cpp/yolox_ros_cpp/labels/coco_names.txt) for label format.
 - `num_classes`: 80
 - `model_version`: 0.1.1rc0
-- `openvino/device`: AUTO
+- `openvino_device`: AUTO
 - `nms`: 0.45
 - `imshow_isshow`: true
 - `src_image_topic_name`: /image_raw
 - `publish_image_topic_name`: /yolox/image_raw
 - `publish_boundingbox_topic_name`: /yolox/bounding_boxes
+- `use_bbox_ex_msgs`: false
+- `publish_resized_image`: false
 
 </details>
 
@@ -208,18 +207,20 @@ ros2 launch yolox_ros_cpp yolox_tflite.launch.py
 <details>
 <summary>TensorRT example</summary>
 
-- `model_path`: ./install/yolox_ros_cpp/share/yolox_ros_cpp/weights/tensorrt/yolox_tiny.trt
+- `model_path`: ../src/YOLOX-ROS/weights/tensorrt/yolox_tiny.trt
 - `p6`: false
 - `class_labels_path`: ""
 - `num_classes`: 80
 - `model_version`: 0.1.1rc0
-- `tensorrt/device`: 0
+- `tensorrt_device`: 0
 - `conf`: 0.3
 - `nms`: 0.45
 - `imshow_isshow`: true
 - `src_image_topic_name`: /image_raw
 - `publish_image_topic_name`: /yolox/image_raw
 - `publish_boundingbox_topic_name`: /yolox/bounding_boxes
+- `use_bbox_ex_msgs`: false
+- `publish_resized_image`: false
 
 </details>
 
@@ -227,17 +228,17 @@ ros2 launch yolox_ros_cpp yolox_tflite.launch.py
 <summary>ONNXRuntime example</summary>
 
 
-- `model_path`: ./install/yolox_ros_cpp/share/yolox_ros_cpp/weights/onnx/yolox_tiny.onnx
+- `model_path`: ./src/YOLOX-ROS/weights/onnx/yolox_tiny.onnx
 - `p6`: false
 - `class_labels_path`: ""
 - `num_classes`: 80
 - `model_version`: 0.1.1rc0
-- `onnxruntime/use_cuda`: true
-- `onnxruntime/use_parallel`: false
-- `onnxruntime/device_id`: 0
-- `onnxruntime/inter_op_num_threads`: 1
-  - if `onnxruntime/use_parallel` is true, the number of threads used to parallelize the execution of the graph (across nodes).
-- `onnxruntime/intra_op_num_threads`: 1
+- `onnxruntime_use_cuda`: true
+- `onnxruntime_use_parallel`: false
+- `onnxruntime_device_id`: 0
+- `onnxruntime_inter_op_num_threads`: 1
+  - if `onnxruntime_use_parallel` is true, the number of threads used to parallelize the execution of the graph (across nodes).
+- `onnxruntime_intra_op_num_threads`: 1
   - the number of threads to use to run the model
 - `conf`: 0.3
 - `nms`: 0.45
@@ -245,25 +246,29 @@ ros2 launch yolox_ros_cpp yolox_tflite.launch.py
 - `src_image_topic_name`: /image_raw
 - `publish_image_topic_name`: /yolox/image_raw
 - `publish_boundingbox_topic_name`: /yolox/bounding_boxes
+- `use_bbox_ex_msgs`: false
+- `publish_resized_image`: false
 
 </details>
 
 <details>
 <summary>Tensorflow Lite example</summary>
 
-- `model_path`: ./install/yolox_ros_cpp/share/yolox_ros_cpp/weights/tflite/model.tflite
+- `model_path`: ./src/YOLOX-ROS/weights/tflite/model.tflite
 - `p6`: false
 - `is_nchw`: true
 - `class_labels_path`: ""
 - `num_classes`: 1
 - `model_version`: 0.1.1rc0
-- `tflite/num_threads`: 1
+- `tflite_num_threads`: 1
 - `conf`: 0.3
 - `nms`: 0.45
 - `imshow_isshow`: true
 - `src_image_topic_name`: /image_raw
 - `publish_image_topic_name`: /yolox/image_raw
 - `publish_boundingbox_topic_name`: /yolox/bounding_boxes
+- `use_bbox_ex_msgs`: false
+- `publish_resized_image`: false
 
 </details>
 
diff --git a/yolox_ros_cpp/yolox_cpp/CMakeLists.txt b/yolox_ros_cpp/yolox_cpp/CMakeLists.txt
index a4d9b57..db849f4 100644
--- a/yolox_ros_cpp/yolox_cpp/CMakeLists.txt
+++ b/yolox_ros_cpp/yolox_cpp/CMakeLists.txt
@@ -39,9 +39,9 @@ if(YOLOX_USE_OPENVINO)
   find_package(OpenVINO REQUIRED)
 
   set(ENABLE_OPENVINO ON)
-  set(TARGET_SRC src/yolox_openvino.cpp)
-  set(TARGET_LIBS openvino::runtime)
-  set(TARGET_DPENDENCIES OpenVINO)
+  set(TARGET_SRC ${TARGET_SRC} src/yolox_openvino.cpp)
+  set(TARGET_LIBS ${TARGET_LIBS} openvino::runtime)
+  set(TARGET_DPENDENCIES ${TARGET_DPENDENCIES} OpenVINO)
 endif()
 
 if(YOLOX_USE_TENSORRT)
@@ -54,28 +54,29 @@ if(YOLOX_USE_TENSORRT)
     PATH_SUFFIXES include)
 
   set(ENABLE_TENSORRT ON)
-  set(TARGET_SRC src/yolox_tensorrt.cpp)
-  set(TARGET_LIBS nvinfer nvinfer_plugin)
-  set(TARGET_DPENDENCIES CUDA)
+  set(TARGET_SRC ${TARGET_SRC} src/yolox_tensorrt.cpp)
+  set(TARGET_LIBS ${TARGET_LIBS} nvinfer nvinfer_plugin)
+  set(TARGET_DPENDENCIES ${TARGET_DPENDENCIES} CUDA)
 endif()
 
 # tflite
 if(YOLOX_USE_TFLITE)
   set(ENABLE_TFLITE ON)
-  set(TARGET_SRC src/yolox_tflite.cpp)
-  set(INCLUDES ${INCLUDES} ${TFLITE_INCLUDE_DIR})
-  set(INCLUDES ${INCLUDES} ${ABSEIL_CPP_ICLUDE_DIR})
-  set(INCLUDES ${INCLUDES} ${FLATBUFFERS_INCLUDE_DIR})
-  set(TARGET_LIBS ${TFLITE_LIB_PATH}/libtensorflow-lite.so)
-  target_include_directories(yolox_cpp PUBLIC ${INCLUDES})
+  set(TARGET_SRC ${TARGET_SRC} src/yolox_tflite.cpp)
+  set(TFLITE_INCLUDES
+    ${TFLITE_INCLUDE_DIR}
+    ${ABSEIL_CPP_ICLUDE_DIR}
+    ${FLATBUFFERS_INCLUDE_DIR}
+  )
+  set(TARGET_LIBS ${TARGET_LIBS} ${TFLITE_LIB_PATH}/libtensorflow-lite.so)
 endif()
 
 # onnxruntime
 if(YOLOX_USE_ONNXRUNTIME)
   find_library(ONNXRUNTIME NAMES onnxruntime)
   set(ENABLE_ONNXRUNTIME ON)
-  set(TARGET_SRC src/yolox_onnxruntime.cpp)
-  set(TARGET_LIBS onnxruntime)
+  set(TARGET_SRC ${TARGET_SRC} src/yolox_onnxruntime.cpp)
+  set(TARGET_LIBS ${TARGET_LIBS} onnxruntime)
 endif()
 
 configure_file(
@@ -89,7 +90,8 @@ ament_export_dependencies(${TARGET_DPENDENCIES})
 target_link_libraries(yolox_cpp ${TARGET_LIBS})
 
 if (YOLOX_USE_TFLITE)
-  ament_export_include_directories(${INCLUDES})
+  target_include_directories(yolox_cpp PUBLIC ${TFLITE_INCLUDES})
+  ament_export_include_directories(${TFLITE_INCLUDES})
   install(DIRECTORY ${TFLITE_LIB_PATH}/ DESTINATION lib)
 endif()
 
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/.gitignore b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/.gitignore
index 299bb98..0e56cf2 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/.gitignore
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/.gitignore
@@ -1 +1 @@
-config.h
\ No newline at end of file
+config.h
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp
index 61d2f03..acf1b3a 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp
@@ -96,4 +96,4 @@ namespace yolox_cpp{
         {0.500, 0.500, 0.000}
     };
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp
index fb68649..5360908 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp
@@ -32,9 +32,9 @@ namespace yolox_cpp
     {
     public:
         AbcYoloX() {}
-        AbcYoloX(const float nms_th = 0.45, const float conf_th = 0.3,
+        AbcYoloX(float nms_th = 0.45, float conf_th = 0.3,
                  const std::string &model_version = "0.1.1rc0",
-                 const int num_classes = 80, const bool p6 = false)
+                 int num_classes = 80, bool p6 = false)
             : nms_thresh_(nms_th), bbox_conf_thresh_(conf_th),
               num_classes_(num_classes), p6_(p6), model_version_(model_version)
         {
@@ -49,16 +49,21 @@ namespace yolox_cpp
         int num_classes_;
         bool p6_;
         std::string model_version_;
-        const std::vector<float> mean_ = {0.485, 0.456, 0.406};
-        const std::vector<float> std_ = {0.229, 0.224, 0.225};
+        // const std::vector<float> mean_ = {0.485, 0.456, 0.406};
+        // const std::vector<float> std_ = {0.229, 0.224, 0.225};
+        const std::vector<float> std255_inv_ = {
+            1.0 / (255.0 * 0.229), 1.0 / (255.0 * 0.224), 1.0 / (255.0 * 0.225)};
+        const std::vector<float> mean_std_ = {
+            -0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225};
         const std::vector<int> strides_ = {8, 16, 32};
         const std::vector<int> strides_p6_ = {8, 16, 32, 64};
         std::vector<GridAndStride> grid_strides_;
 
         cv::Mat static_resize(const cv::Mat &img)
         {
-            const float r = std::min(input_w_ / (img.cols * 1.0), input_h_ / (img.rows * 1.0));
-            // r = std::min(r, 1.0f);
+            const float r = std::min(
+                static_cast<float>(input_w_) / static_cast<float>(img.cols),
+                static_cast<float>(input_h_) / static_cast<float>(img.rows));
             const int unpad_w = r * img.cols;
             const int unpad_h = r * img.rows;
             cv::Mat re(unpad_h, unpad_w, CV_8UC3);
@@ -74,31 +79,30 @@ namespace yolox_cpp
             const size_t channels = 3;
             const size_t img_h = img.rows;
             const size_t img_w = img.cols;
+            const size_t img_hw = img_h * img_w;
+            float *blob_data_ch0 = blob_data;
+            float *blob_data_ch1 = blob_data + img_hw;
+            float *blob_data_ch2 = blob_data + img_hw * 2;
+            // HWC -> CHW
             if (this->model_version_ == "0.1.0")
             {
-                for (size_t c = 0; c < channels; ++c)
+                for (size_t i = 0; i < img_hw; ++i)
                 {
-                    for (size_t h = 0; h < img_h; ++h)
-                    {
-                        for (size_t w = 0; w < img_w; ++w)
-                        {
-                            blob_data[c * img_w * img_h + h * img_w + w] =
-                                ((float)img.ptr<cv::Vec3b>(h)[w][c] / 255.0 - this->mean_[c]) / this->std_[c];
-                        }
-                    }
+                    // blob = (img / 255.0 - mean) / std
+                    const size_t src_idx = i * channels;
+                    blob_data_ch0[i] = static_cast<float>(img.data[src_idx + 0]) * this->std255_inv_[0] + this->mean_std_[0];
+                    blob_data_ch1[i] = static_cast<float>(img.data[src_idx + 1]) * this->std255_inv_[1] + this->mean_std_[1];
+                    blob_data_ch2[i] = static_cast<float>(img.data[src_idx + 2]) * this->std255_inv_[2] + this->mean_std_[2];
                 }
             }
             else
             {
-                for (size_t c = 0; c < channels; ++c)
+                for (size_t i = 0; i < img_hw; ++i)
                 {
-                    for (size_t h = 0; h < img_h; ++h)
-                    {
-                        for (size_t w = 0; w < img_w; ++w)
-                        {
-                            blob_data[c * img_w * img_h + h * img_w + w] = (float)img.ptr<cv::Vec3b>(h)[w][c]; // 0.1.1rc0 or later
-                        }
-                    }
+                    const size_t src_idx = i * channels;
+                    blob_data_ch0[i] = static_cast<float>(img.data[src_idx + 0]);
+                    blob_data_ch1[i] = static_cast<float>(img.data[src_idx + 1]);
+                    blob_data_ch2[i] = static_cast<float>(img.data[src_idx + 2]);
                 }
             }
         }
@@ -107,33 +111,25 @@ namespace yolox_cpp
         void blobFromImage_nhwc(const cv::Mat &img, float *blob_data)
         {
             const size_t channels = 3;
-            const size_t img_h = img.rows;
-            const size_t img_w = img.cols;
+            cv::Mat img_f32;
+            img.convertTo(img_f32, CV_32FC3);
             if (this->model_version_ == "0.1.0")
             {
-                for (size_t i = 0; i < img_h * img_w; ++i)
-                {
-                    for (size_t c = 0; c < channels; ++c)
-                    {
-                        blob_data[i * channels + c] =
-                            ((float)img.data[i * channels + c] / 255.0 - this->mean_[c]) / this->std_[c];
-                    }
-                }
-            }
-            else
-            {
-                for (size_t i = 0; i < img_h * img_w; ++i)
+                std::vector<cv::Mat> img_f32_split(3);
+                cv::split(img_f32, img_f32_split);
+                for (size_t i = 0; i < channels; ++i)
                 {
-                    for (size_t c = 0; c < channels; ++c)
-                    {
-                        blob_data[i * channels + c] = (float)img.data[i * channels + c]; // 0.1.1rc0 or later
-                    }
+                    img_f32_split[i] *= this->std255_inv_[i];
+                    img_f32_split[i] += this->mean_std_[i];
                 }
+                cv::merge(img_f32_split, img_f32);
             }
+            memcpy(blob_data, img_f32.data, img.rows * img.cols * channels * sizeof(float));
         }
 
         void generate_grids_and_stride(const int target_w, const int target_h, const std::vector<int> &strides, std::vector<GridAndStride> &grid_strides)
         {
+            grid_strides.clear();
             for (auto stride : strides)
             {
                 const int num_grid_w = target_w / stride;
@@ -151,6 +147,7 @@ namespace yolox_cpp
         void generate_yolox_proposals(const std::vector<GridAndStride> &grid_strides, const float *feat_ptr, const float prob_threshold, std::vector<Object> &objects)
         {
             const int num_anchors = grid_strides.size();
+            objects.clear();
 
             for (int anchor_idx = 0; anchor_idx < num_anchors; ++anchor_idx)
             {
@@ -160,18 +157,15 @@ namespace yolox_cpp
 
                 const int basic_pos = anchor_idx * (num_classes_ + 5);
 
-                const float box_objectness = feat_ptr[basic_pos + 4];
                 int class_id = 0;
-                float max_class_score = 0.0;
-                for (int class_idx = 0; class_idx < num_classes_; ++class_idx)
+                float max_class_score = 0.0f;
                 {
-                    const float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];
-                    const float box_prob = box_objectness * box_cls_score;
-                    if (box_prob > max_class_score)
-                    {
-                        class_id = class_idx;
-                        max_class_score = box_prob;
-                    }
+                    const float box_objectness = feat_ptr[basic_pos + 4];
+                    auto begin = feat_ptr + (basic_pos + 5);
+                    auto end = feat_ptr + (basic_pos + 5 + num_classes_);
+                    auto max_elem = std::max_element(begin, end);
+                    class_id = max_elem - begin;
+                    max_class_score = (*max_elem) * box_objectness;
                 }
                 if (max_class_score > prob_threshold)
                 {
@@ -203,42 +197,6 @@ namespace yolox_cpp
             return inter.area();
         }
 
-        void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right)
-        {
-            int i = left;
-            int j = right;
-            float p = faceobjects[(left + right) / 2].prob;
-
-            while (i <= j)
-            {
-                while (faceobjects[i].prob > p)
-                    ++i;
-
-                while (faceobjects[j].prob < p)
-                    --j;
-
-                if (i <= j)
-                {
-                    std::swap(faceobjects[i], faceobjects[j]);
-
-                    ++i;
-                    --j;
-                }
-            }
-            if (left < j)
-                qsort_descent_inplace(faceobjects, left, j);
-            if (i < right)
-                qsort_descent_inplace(faceobjects, i, right);
-        }
-
-        void qsort_descent_inplace(std::vector<Object> &objects)
-        {
-            if (objects.empty())
-                return;
-
-            qsort_descent_inplace(objects, 0, objects.size() - 1);
-        }
-
         void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked, const float nms_threshold)
         {
             picked.clear();
@@ -256,7 +214,7 @@ namespace yolox_cpp
                 const Object &a = faceobjects[i];
                 const int picked_size = picked.size();
 
-                int keep = 1;
+                bool keep = true;
                 for (int j = 0; j < picked_size; ++j)
                 {
                     const Object &b = faceobjects[picked[j]];
@@ -266,7 +224,10 @@ namespace yolox_cpp
                     const float union_area = areas[i] + areas[picked[j]] - inter_area;
                     // float IoU = inter_area / union_area
                     if (inter_area / union_area > nms_threshold)
-                        keep = 0;
+                    {
+                        keep = false;
+                        break;
+                    }
                 }
 
                 if (keep)
@@ -282,29 +243,36 @@ namespace yolox_cpp
             std::vector<Object> proposals;
             generate_yolox_proposals(grid_strides, prob, bbox_conf_thresh, proposals);
 
-            qsort_descent_inplace(proposals);
+            std::sort(
+                proposals.begin(), proposals.end(),
+                [](const Object &a, const Object &b)
+                {
+                    return a.prob > b.prob; // descent
+                });
 
             std::vector<int> picked;
             nms_sorted_bboxes(proposals, picked, nms_thresh_);
 
             int count = picked.size();
             objects.resize(count);
+            const float max_x = static_cast<float>(img_w - 1);
+            const float max_y = static_cast<float>(img_h - 1);
 
             for (int i = 0; i < count; ++i)
             {
                 objects[i] = proposals[picked[i]];
 
                 // adjust offset to original unpadded
-                float x0 = (objects[i].rect.x) / scale;
-                float y0 = (objects[i].rect.y) / scale;
+                float x0 = objects[i].rect.x / scale;
+                float y0 = objects[i].rect.y / scale;
                 float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;
                 float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;
 
                 // clip
-                x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
-                y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
-                x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
-                y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
+                x0 = std::max(std::min(x0, max_x), 0.f);
+                y0 = std::max(std::min(y0, max_y), 0.f);
+                x1 = std::max(std::min(x1, max_x), 0.f);
+                y1 = std::max(std::min(y1, max_y), 0.f);
 
                 objects[i].rect.x = x0;
                 objects[i].rect.y = y0;
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox.hpp
index faad7cc..7a2a16c 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox.hpp
@@ -19,5 +19,4 @@
     #include "yolox_tflite.hpp"
 #endif
 
-
 #endif
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_onnxruntime.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_onnxruntime.hpp
index 75e6fd0..413356a 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_onnxruntime.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_onnxruntime.hpp
@@ -8,7 +8,7 @@
 #include <iostream>
 #include <opencv2/opencv.hpp>
 
-#include "onnxruntime/core/session/onnxruntime_cxx_api.h"
+#include <onnxruntime/onnxruntime_cxx_api.h>
 
 #include "core.hpp"
 #include "coco_names.hpp"
@@ -16,18 +16,18 @@
 namespace yolox_cpp{
     class YoloXONNXRuntime: public AbcYoloX{
         public:
-            YoloXONNXRuntime(file_name_t path_to_model,
+            YoloXONNXRuntime(const file_name_t &path_to_model,
                              int intra_op_num_threads, int inter_op_num_threads=1,
                              bool use_cuda=true, int device_id=0, bool use_parallel=false,
-                             float nms_th=0.45, float conf_th=0.3, std::string model_version="0.1.1rc0",
+                             float nms_th=0.45, float conf_th=0.3, const std::string &model_version="0.1.1rc0",
                              int num_classes=80, bool p6=false);
             std::vector<Object> inference(const cv::Mat& frame) override;
 
         private:
             int intra_op_num_threads_ = 1;
             int inter_op_num_threads_ = 1;
-            int device_id_ = 0;
             bool use_cuda_ = true;
+            int device_id_ = 0;
             bool use_parallel_ = false;
 
             Ort::Session session_{nullptr};
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_openvino.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_openvino.hpp
index d3ba1de..9c99c91 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_openvino.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_openvino.hpp
@@ -15,8 +15,8 @@
 namespace yolox_cpp{
     class YoloXOpenVINO: public AbcYoloX{
         public:
-            YoloXOpenVINO(file_name_t path_to_model, std::string device_name,
-                          float nms_th=0.45, float conf_th=0.3, std::string model_version="0.1.1rc0",
+            YoloXOpenVINO(const file_name_t &path_to_model, std::string device_name,
+                          float nms_th=0.45, float conf_th=0.3, const std::string &model_version="0.1.1rc0",
                           int num_classes=80, bool p6=false);
             std::vector<Object> inference(const cv::Mat& frame) override;
 
@@ -28,4 +28,4 @@ namespace yolox_cpp{
     };
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp
index c61b0ae..15ee452 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp
@@ -9,8 +9,8 @@
 #include <fstream>
 #include <opencv2/opencv.hpp>
 
-#include "cuda_runtime_api.h"
-#include "NvInfer.h"
+#include <cuda_runtime_api.h>
+#include <NvInfer.h>
 
 #include "core.hpp"
 #include "coco_names.hpp"
@@ -25,7 +25,7 @@ namespace yolox_cpp{
             auto ret = (status);\
             if (ret != 0)\
             {\
-                std::cerr << "Cuda failure: " << ret << std::endl;\
+                std::cerr << "CUDA Failure: " << ret << std::endl;\
                 abort();\
             }\
         } while (0)
@@ -33,14 +33,14 @@ namespace yolox_cpp{
 
     class YoloXTensorRT: public AbcYoloX{
         public:
-            YoloXTensorRT(file_name_t path_to_engine, int device=0,
-                          float nms_th=0.45, float conf_th=0.3, std::string model_version="0.1.1rc0",
+            YoloXTensorRT(const file_name_t &path_to_engine, int device=0,
+                          float nms_th=0.45, float conf_th=0.3, const std::string &model_version="0.1.1rc0",
                           int num_classes=80, bool p6=false);
             ~YoloXTensorRT();
             std::vector<Object> inference(const cv::Mat& frame) override;
 
         private:
-            void doInference(float* input, float* output);
+            void doInference(const float* input, float* output);
 
             int DEVICE_ = 0;
             Logger gLogger_;
@@ -51,8 +51,10 @@ namespace yolox_cpp{
             const int inputIndex_ = 0;
             const int outputIndex_ = 1;
             void *inference_buffers_[2];
+            std::vector<float> input_blob_;
+            std::vector<float> output_blob_;
 
     };
 } // namespace yolox_cpp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tflite.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tflite.hpp
index ff216be..ffeea32 100644
--- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tflite.hpp
+++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tflite.hpp
@@ -30,8 +30,8 @@ namespace yolox_cpp{
 
     class YoloXTflite: public AbcYoloX{
         public:
-            YoloXTflite(file_name_t path_to_model, int num_threads,
-                        float nms_th=0.45, float conf_th=0.3, std::string model_version="0.1.1rc0",
+            YoloXTflite(const file_name_t &path_to_model, int num_threads,
+                        float nms_th=0.45, float conf_th=0.3, const std::string &model_version="0.1.1rc0",
                         int num_classes=80, bool p6=false, bool is_nchw=true);
             ~YoloXTflite();
             std::vector<Object> inference(const cv::Mat& frame) override;
@@ -50,4 +50,4 @@ namespace yolox_cpp{
     };
 } // namespace yolox_cpp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp
index 3545584..d3f97a0 100644
--- a/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp
+++ b/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp
@@ -2,10 +2,10 @@
 
 namespace yolox_cpp{
 
-    YoloXONNXRuntime::YoloXONNXRuntime(file_name_t path_to_model,
+    YoloXONNXRuntime::YoloXONNXRuntime(const file_name_t &path_to_model,
                                        int intra_op_num_threads, int inter_op_num_threads,
                                        bool use_cuda, int device_id, bool use_parallel,
-                                       float nms_th, float conf_th, std::string model_version,
+                                       float nms_th, float conf_th, const std::string &model_version,
                                        int num_classes, bool p6)
     :AbcYoloX(nms_th, conf_th, model_version, num_classes, p6),
      intra_op_num_threads_(intra_op_num_threads), inter_op_num_threads_(inter_op_num_threads),
@@ -137,8 +137,11 @@ namespace yolox_cpp{
 
         float* net_pred = (float *)this->output_buffer_[0].get();
 
-        // post process
-        float scale = std::min(input_w_ / (frame.cols*1.0), input_h_ / (frame.rows*1.0));
+        // postprocess
+        const float scale = std::min(
+            static_cast<float>(this->input_w_) / static_cast<float>(frame.cols),
+            static_cast<float>(this->input_h_) / static_cast<float>(frame.rows)
+        );
         std::vector<Object> objects;
         decode_outputs(net_pred, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
         return objects;
diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp
index d0a63aa..a6c9df1 100644
--- a/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp
+++ b/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp
@@ -1,8 +1,8 @@
 #include "yolox_cpp/yolox_openvino.hpp"
 
 namespace yolox_cpp{
-    YoloXOpenVINO::YoloXOpenVINO(file_name_t path_to_model, std::string device_name,
-                                 float nms_th, float conf_th, std::string model_version,
+    YoloXOpenVINO::YoloXOpenVINO(const file_name_t &path_to_model, std::string device_name,
+                                 float nms_th, float conf_th, const std::string &model_version,
                                  int num_classes, bool p6)
     :AbcYoloX(nms_th, conf_th, model_version, num_classes, p6),
      device_name_(device_name)
@@ -69,14 +69,16 @@ namespace yolox_cpp{
             ov::Tensor{ov::element::f32, this->input_shape_, reinterpret_cast<float *>(this->blob_.data())});
         infer_request_.infer();
 
-
         const auto &output_tensor = this->infer_request_.get_output_tensor();
         const float* net_pred = reinterpret_cast<float *>(output_tensor.data());
 
-        float scale = std::min(input_w_ / (frame.cols*1.0), input_h_ / (frame.rows*1.0));
+        const float scale = std::min(
+            static_cast<float>(this->input_w_) / static_cast<float>(frame.cols),
+            static_cast<float>(this->input_h_) / static_cast<float>(frame.rows)
+        );
+
         std::vector<Object> objects;
         decode_outputs(net_pred, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
         return objects;
     }
-
-}
\ No newline at end of file
+}
diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp
index 4dd7151..f5c73ad 100644
--- a/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp
+++ b/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp
@@ -3,15 +3,15 @@
 namespace yolox_cpp
 {
 
-    YoloXTensorRT::YoloXTensorRT(file_name_t path_to_engine, int device,
-                                 float nms_th, float conf_th, std::string model_version,
+    YoloXTensorRT::YoloXTensorRT(const file_name_t &path_to_engine, int device,
+                                 float nms_th, float conf_th, const std::string &model_version,
                                  int num_classes, bool p6)
         : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6),
           DEVICE_(device)
     {
         cudaSetDevice(this->DEVICE_);
         // create a model using the API directly and serialize it to a stream
-        char *trtModelStream{nullptr};
+        std::vector<char> trtModelStream;
         size_t size{0};
 
         std::ifstream file(path_to_engine, std::ios::binary);
@@ -20,24 +20,23 @@ namespace yolox_cpp
             file.seekg(0, file.end);
             size = file.tellg();
             file.seekg(0, file.beg);
-            trtModelStream = new char[size];
-            assert(trtModelStream);
-            file.read(trtModelStream, size);
+            trtModelStream.resize(size);
+            file.read(trtModelStream.data(), size);
             file.close();
         }
         else
         {
-            std::cerr << "invalid arguments path_to_engine: " << path_to_engine << std::endl;
-            return;
+            std::string msg = "invalid arguments path_to_engine: ";
+            msg += path_to_engine;
+            throw std::runtime_error(msg.c_str());
         }
 
         this->runtime_ = std::unique_ptr<IRuntime>(createInferRuntime(this->gLogger_));
         assert(this->runtime_ != nullptr);
-        this->engine_ = std::unique_ptr<ICudaEngine>(this->runtime_->deserializeCudaEngine(trtModelStream, size));
+        this->engine_ = std::unique_ptr<ICudaEngine>(this->runtime_->deserializeCudaEngine(trtModelStream.data(), size));
         assert(this->engine_ != nullptr);
         this->context_ = std::unique_ptr<IExecutionContext>(this->engine_->createExecutionContext());
         assert(this->context_ != nullptr);
-        delete[] trtModelStream;
 
         const auto input_name = this->engine_->getIOTensorName(this->inputIndex_);
         const auto input_dims = this->engine_->getTensorShape(input_name);
@@ -54,6 +53,10 @@ namespace yolox_cpp
             this->output_size_ *= output_dims.d[j];
         }
 
+        // allocate buffer
+        this->input_blob_.resize(this->input_h_ * this->input_w_ * 3);
+        this->output_blob_.resize(this->output_size_);
+
         // Pointers to input and output device buffers to pass to engine.
         // Engine requires exactly IEngine::getNbBindings() number of buffers.
         assert(this->engine_->getNbIOTensors() == 2);
@@ -69,8 +72,8 @@ namespace yolox_cpp
         assert(this->context_->setInputShape(input_name, input_dims));
         assert(this->context_->allInputDimensionsSpecified());
 
-        assert(this->context_->setTensorAddress(input_name, this->inference_buffers_[this->inputIndex_]));
-        assert(this->context_->setTensorAddress(output_name, this->inference_buffers_[this->outputIndex_]));
+        assert(this->context_->setInputTensorAddress(input_name, this->inference_buffers_[this->inputIndex_]));
+        assert(this->context_->setOutputTensorAddress(output_name, this->inference_buffers_[this->outputIndex_]));
 
         // Prepare GridAndStrides
         if (this->p6_)
@@ -93,37 +96,49 @@ namespace yolox_cpp
     {
         // preprocess
         auto pr_img = static_resize(frame);
-        float *input_blob = new float[pr_img.total() * 3];
-        blobFromImage(pr_img, input_blob);
+        blobFromImage(pr_img, input_blob_.data());
 
         // inference
-        float *output_blob = new float[this->output_size_];
-        this->doInference(input_blob, output_blob);
+        this->doInference(input_blob_.data(), output_blob_.data());
 
-        float scale = std::min(this->input_w_ / (frame.cols * 1.0), this->input_h_ / (frame.rows * 1.0));
+        // postprocess
+        const float scale = std::min(
+            static_cast<float>(this->input_w_) / static_cast<float>(frame.cols),
+            static_cast<float>(this->input_h_) / static_cast<float>(frame.rows)
+        );
 
         std::vector<Object> objects;
-        decode_outputs(output_blob, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
+        decode_outputs(
+            output_blob_.data(), this->grid_strides_, objects,
+            this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
 
-        delete input_blob;
-        delete output_blob;
         return objects;
     }
 
-    void YoloXTensorRT::doInference(float *input, float *output)
+    void YoloXTensorRT::doInference(const float *input, float *output)
     {
         // Create stream
         cudaStream_t stream;
         CHECK(cudaStreamCreate(&stream));
 
         // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
-        CHECK(cudaMemcpyAsync(this->inference_buffers_[this->inputIndex_], input, 3 * this->input_h_ * this->input_w_ * sizeof(float), cudaMemcpyHostToDevice, stream));
-
-        bool success = context_->enqueueV3(stream);
+        CHECK(
+            cudaMemcpyAsync(
+                this->inference_buffers_[this->inputIndex_],
+                input,
+                3 * this->input_h_ * this->input_w_ * sizeof(float),
+                cudaMemcpyHostToDevice, stream));
+
+        bool success = context_->executeV2(this->inference_buffers_);
         if (!success)
             throw std::runtime_error("failed inference");
 
-        CHECK(cudaMemcpyAsync(output, this->inference_buffers_[this->outputIndex_], this->output_size_ * sizeof(float), cudaMemcpyDeviceToHost, stream));
+        CHECK(
+            cudaMemcpyAsync(
+                output,
+                this->inference_buffers_[this->outputIndex_],
+                this->output_size_ * sizeof(float),
+                cudaMemcpyDeviceToHost, stream));
 
         CHECK(cudaStreamSynchronize(stream));
 
diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp
index 83a8e99..5182150 100644
--- a/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp
+++ b/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp
@@ -3,8 +3,8 @@
 namespace yolox_cpp
 {
 
-    YoloXTflite::YoloXTflite(file_name_t path_to_model, int num_threads,
-                             float nms_th, float conf_th, std::string model_version,
+    YoloXTflite::YoloXTflite(const file_name_t &path_to_model, int num_threads,
+                             float nms_th, float conf_th, const std::string &model_version,
                              int num_classes, bool p6, bool is_nchw)
         : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), is_nchw_(is_nchw)
     {
@@ -25,8 +25,8 @@ namespace yolox_cpp
         status = this->interpreter_->SetNumThreads(num_threads);
         if (status != TfLiteStatus::kTfLiteOk)
         {
-            std::cerr << "Failed to SetNumThreads." << std::endl;
-            exit(1);
+            std::string msg = "Failed to SetNumThreads.";
+            throw std::runtime_error(msg.c_str());
         }
 
         // XNNPACK Delegate
@@ -36,8 +36,8 @@ namespace yolox_cpp
         status = this->interpreter_->ModifyGraphWithDelegate(this->delegate_);
         if (status != TfLiteStatus::kTfLiteOk)
         {
-            std::cerr << "Failed to ModifyGraphWithDelegate." << std::endl;
-            exit(1);
+            std::string msg = "Failed to ModifyGraphWithDelegate.";
+            throw std::runtime_error(msg.c_str());
         }
 
         // // GPU Delegate
@@ -67,8 +67,8 @@ namespace yolox_cpp
 
         if (this->interpreter_->AllocateTensors() != TfLiteStatus::kTfLiteOk)
         {
-            std::cerr << "Failed to allocate tensors." << std::endl;
-            exit(1);
+            std::string msg = "Failed to allocate tensors.";
+            throw std::runtime_error(msg.c_str());
         }
 
         {
@@ -97,7 +97,7 @@ namespace yolox_cpp
             {
                 this->input_size_ = sizeof(float);
             }
-            for (size_t i = 0; i < tensor->dims->size; i++)
+            for (int i = 0; i < tensor->dims->size; i++)
             {
                 this->input_size_ *= tensor->dims->data[i];
                 std::cout << "   - " << tensor->dims->data[i] << std::endl;
@@ -120,7 +120,7 @@ namespace yolox_cpp
             {
                 this->output_size_ = sizeof(float);
             }
-            for (size_t i = 0; i < tensor->dims->size; i++)
+            for (int i = 0; i < tensor->dims->size; i++)
             {
                 this->output_size_ *= tensor->dims->data[i];
                 std::cout << "   - " << tensor->dims->data[i] << std::endl;
@@ -166,10 +166,15 @@ namespace yolox_cpp
         }
 
         // postprocess
+        const float scale = std::min(
+            static_cast<float>(this->input_w_) / static_cast<float>(frame.cols),
+            static_cast<float>(this->input_h_) / static_cast<float>(frame.rows)
+        );
         std::vector<Object> objects;
-        float scale = std::min(this->input_w_ / (frame.cols * 1.0), this->input_h_ / (frame.rows * 1.0));
-        float *output_blob = this->interpreter_->typed_output_tensor<float>(0);
-        decode_outputs(output_blob, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
+        decode_outputs(
+            this->interpreter_->typed_output_tensor<float>(0),
+            this->grid_strides_, objects,
+            this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
 
         return objects;
     }
diff --git a/yolox_ros_cpp/yolox_param/package.xml b/yolox_ros_cpp/yolox_param/package.xml
index 385808e..cc3744d 100644
--- a/yolox_ros_cpp/yolox_param/package.xml
+++ b/yolox_ros_cpp/yolox_param/package.xml
@@ -19,4 +19,4 @@
   <export>
     <build_type>ament_cmake</build_type>
   </export>
-</package>
\ No newline at end of file
+</package>
diff --git a/yolox_ros_cpp/yolox_param/src/yolox_parameters.yaml b/yolox_ros_cpp/yolox_param/src/yolox_parameters.yaml
index a972b85..bb71c40 100644
--- a/yolox_ros_cpp/yolox_param/src/yolox_parameters.yaml
+++ b/yolox_ros_cpp/yolox_param/src/yolox_parameters.yaml
@@ -93,4 +93,4 @@ yolox_parameters:
   publish_resized_image:
     type: bool
     description: "Enable or disable resized image."
-    default_value: false
\ No newline at end of file
+    default_value: false
diff --git a/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp b/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp
index 2bde92a..4253237 100644
--- a/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp
+++ b/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp
@@ -3,7 +3,11 @@
 #include <cmath>
 #include <chrono>
 
+#if __has_include(<cv_bridge/cv_bridge.hpp>)
 #include <cv_bridge/cv_bridge.hpp>
+#else
+#include <cv_bridge/cv_bridge.h>
+#endif
 #include <image_transport/image_transport.hpp>
 #include <rclcpp/rclcpp.hpp>
 #include <rclcpp_components/register_node_macro.hpp>
diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py
index df34cdb..61d0802 100644
--- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py
+++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py
@@ -52,26 +52,26 @@ def generate_launch_description():
             description='yolox model version.'
         ),
         DeclareLaunchArgument(
-            'onnxruntime/use_cuda',
+            'onnxruntime_use_cuda',
             default_value='true',
             description='onnxruntime use cuda.'
         ),
         DeclareLaunchArgument(
-            'onnxruntime/device_id',
+            'onnxruntime_device_id',
             default_value='0',
             description='onnxruntime gpu device id.'
         ),
         DeclareLaunchArgument(
-            'onnxruntime/use_parallel',
+            'onnxruntime_use_parallel',
             default_value='false',
             description='if use_parallel is true, you can set inter_op_num_threads.'
         ),
         DeclareLaunchArgument(
-            'onnxruntime/inter_op_num_threads',
+            'onnxruntime_inter_op_num_threads',
             default_value='1'
         ),
         DeclareLaunchArgument(
-            'onnxruntime/intra_op_num_threads',
+            'onnxruntime_intra_op_num_threads',
             default_value='1',
             description='ontrols the number of threads to use to run the model.'
         ),
@@ -141,11 +141,11 @@ def generate_launch_description():
                     'num_classes': LaunchConfiguration('num_classes'),
                     'model_type': 'onnxruntime',
                     'model_version': LaunchConfiguration('model_version'),
-                    'onnxruntime/use_cuda': LaunchConfiguration('onnxruntime/use_cuda'),
-                    'onnxruntime/device_id': LaunchConfiguration('onnxruntime/device_id'),
-                    'onnxruntime/use_parallel': LaunchConfiguration('onnxruntime/use_parallel'),
-                    'onnxruntime/inter_op_num_threads': LaunchConfiguration('onnxruntime/inter_op_num_threads'),
-                    'onnxruntime/intra_op_num_threads': LaunchConfiguration('onnxruntime/intra_op_num_threads'),
+                    'onnxruntime_use_cuda': LaunchConfiguration('onnxruntime_use_cuda'),
+                    'onnxruntime_device_id': LaunchConfiguration('onnxruntime_device_id'),
+                    'onnxruntime_use_parallel': LaunchConfiguration('onnxruntime_use_parallel'),
+                    'onnxruntime_inter_op_num_threads': LaunchConfiguration('onnxruntime_inter_op_num_threads'),
+                    'onnxruntime_intra_op_num_threads': LaunchConfiguration('onnxruntime_intra_op_num_threads'),
                     'conf': LaunchConfiguration('conf'),
                     'nms': LaunchConfiguration('nms'),
                     'imshow_isshow': LaunchConfiguration('imshow_isshow'),
diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py
index 1395507..8b18767 100644
--- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py
+++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py
@@ -37,7 +37,7 @@ def generate_launch_description():
             description='yolox model version.'
         ),
         DeclareLaunchArgument(
-            'openvino/device',
+            'openvino_device',
             default_value='CPU',
             description='model device. CPU, GPU, MYRIAD, etc...'
         ),
@@ -107,7 +107,7 @@ def generate_launch_description():
                     'num_classes': LaunchConfiguration('num_classes'),
                     'model_type': 'openvino',
                     'model_version': LaunchConfiguration('model_version'),
-                    'openvino/device': LaunchConfiguration('openvino/device'),
+                    'openvino_device': LaunchConfiguration('openvino_device'),
                     'conf': LaunchConfiguration('conf'),
                     'nms': LaunchConfiguration('nms'),
                     'imshow_isshow': LaunchConfiguration('imshow_isshow'),
diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py
index c011aeb..fd6eea8 100644
--- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py
+++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py
@@ -14,7 +14,7 @@ def generate_launch_description():
         ),
         DeclareLaunchArgument(
             'model_path',
-            default_value='./install/yolox_ros_cpp/share/yolox_ros_cpp/weights/tensorrt/yolox_nano.trt',
+            default_value='./src/YOLOX-ROS/weights/tensorrt/yolox_tiny.trt',
             description='yolox model path.'
         ),
         DeclareLaunchArgument(
@@ -33,7 +33,7 @@ def generate_launch_description():
             description='num classes.'
         ),
         DeclareLaunchArgument(
-            'tensorrt/device',
+            'tensorrt_device',
             default_value='0',
             description='GPU index. Set in string type. ex 0'
         ),
@@ -108,7 +108,7 @@ def generate_launch_description():
                     'num_classes': LaunchConfiguration('num_classes'),
                     'model_type': 'tensorrt',
                     'model_version': LaunchConfiguration('model_version'),
-                    'tensorrt/device': LaunchConfiguration('tensorrt/device'),
+                    'tensorrt_device': LaunchConfiguration('tensorrt_device'),
                     'conf': LaunchConfiguration('conf'),
                     'nms': LaunchConfiguration('nms'),
                     'imshow_isshow': LaunchConfiguration('imshow_isshow'),
diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py
index 0f3781f..023f972 100644
--- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py
+++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py
@@ -43,7 +43,7 @@ def generate_launch_description():
             description='yolox model version.'
         ),
         DeclareLaunchArgument(
-            'tflite/num_threads',
+            'tflite_num_threads',
             default_value='1',
             description='tflite num_threads.'
         ),
@@ -114,7 +114,7 @@ def generate_launch_description():
                     'is_nchw': LaunchConfiguration('is_nchw'),
                     'model_type': 'tflite',
                     'model_version': LaunchConfiguration('model_version'),
-                    'tflite/num_threads': LaunchConfiguration('tflite/num_threads'),
+                    'tflite_num_threads': LaunchConfiguration('tflite_num_threads'),
                     'conf': LaunchConfiguration('conf'),
                     'nms': LaunchConfiguration('nms'),
                     'imshow_isshow': LaunchConfiguration('imshow_isshow'),
diff --git a/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp b/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp
index e501cb4..a5f5bcf 100644
--- a/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp
+++ b/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp
@@ -120,8 +120,8 @@ namespace yolox_ros_cpp
         auto objects = this->yolox_->inference(frame);
         auto end = std::chrono::system_clock::now();
 
-        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - now);
-        RCLCPP_INFO(this->get_logger(), "Inference time: %5ld ms", elapsed.count());
+        auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end - now);
+        RCLCPP_INFO(this->get_logger(), "Inference time: %5ld us", elapsed.count());
 
         yolox_cpp::utils::draw_objects(frame, objects, this->class_names_);
         if (this->params_.imshow_isshow)