diff --git a/deploy/cpp_infer/CMakeLists.txt b/deploy/cpp_infer/CMakeLists.txt index 16ad72e762..c8b4e3b357 100644 --- a/deploy/cpp_infer/CMakeLists.txt +++ b/deploy/cpp_infer/CMakeLists.txt @@ -133,7 +133,7 @@ if(WITH_MKL) if (WIN32) set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) else () - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libdnnl.so.3) endif () endif() else() diff --git a/deploy/cpp_infer/include/args.h b/deploy/cpp_infer/include/args.h index 31c8c26cc8..82e69a1ba3 100644 --- a/deploy/cpp_infer/include/args.h +++ b/deploy/cpp_infer/include/args.h @@ -18,6 +18,7 @@ // common args DECLARE_bool(use_gpu); +DECLARE_bool(use_mlu); DECLARE_bool(use_tensorrt); DECLARE_int32(gpu_id); DECLARE_int32(gpu_mem); diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h index f5a0356573..3762b79324 100644 --- a/deploy/cpp_infer/include/ocr_cls.h +++ b/deploy/cpp_infer/include/ocr_cls.h @@ -25,12 +25,14 @@ namespace PaddleOCR { class Classifier { public: explicit Classifier(const std::string &model_dir, const bool &use_gpu, + const bool &use_mlu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const double &cls_thresh, const bool &use_tensorrt, const std::string &precision, const int &cls_batch_num) { this->use_gpu_ = use_gpu; + this->use_mlu_ = use_mlu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; @@ -55,6 +57,7 @@ class Classifier { std::shared_ptr predictor_; bool use_gpu_ = false; + bool use_mlu_ = false; int gpu_id_ = 0; int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index ca069c5ad9..680b8e6c74 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -25,6 +25,7 @@ namespace PaddleOCR { class DBDetector { public: explicit DBDetector(const std::string &model_dir, const bool &use_gpu, + const bool &use_mlu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const std::string &limit_type, @@ -35,6 +36,7 @@ class DBDetector { const bool &use_dilation, const bool &use_tensorrt, const std::string &precision) { this->use_gpu_ = use_gpu; + this->use_mlu_ = use_mlu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; @@ -66,6 +68,7 @@ class DBDetector { std::shared_ptr predictor_; bool use_gpu_ = false; + bool use_mlu_ = false; int gpu_id_ = 0; int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index f3712cb3ea..c6948bf4be 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -25,6 +25,7 @@ namespace PaddleOCR { class CRNNRecognizer { public: explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, + const bool &use_mlu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const std::string &label_path, @@ -33,6 +34,7 @@ class CRNNRecognizer { const int &rec_batch_num, const int &rec_img_h, const int &rec_img_w) { this->use_gpu_ = use_gpu; + this->use_mlu_ = use_mlu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; @@ -63,6 +65,7 @@ class CRNNRecognizer { std::shared_ptr predictor_; bool use_gpu_ = false; + bool use_mlu_ = false; int gpu_id_ = 0; int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; diff --git a/deploy/cpp_infer/include/structure_layout.h b/deploy/cpp_infer/include/structure_layout.h index d1f488861a..f7332cbe76 100644 --- a/deploy/cpp_infer/include/structure_layout.h +++ b/deploy/cpp_infer/include/structure_layout.h @@ -25,13 +25,14 @@ namespace PaddleOCR { class StructureLayoutRecognizer { public: explicit StructureLayoutRecognizer( - const std::string &model_dir, const bool &use_gpu, const int &gpu_id, + const std::string &model_dir, const bool &use_gpu, const bool &use_mlu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const std::string &label_path, const bool &use_tensorrt, const std::string &precision, const double &layout_score_threshold, const double &layout_nms_threshold) { this->use_gpu_ = use_gpu; + this->use_mlu_ = use_mlu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; @@ -54,6 +55,7 @@ class StructureLayoutRecognizer { std::shared_ptr predictor_; bool use_gpu_ = false; + bool use_mlu_ = false; int gpu_id_ = 0; int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; diff --git a/deploy/cpp_infer/include/structure_table.h b/deploy/cpp_infer/include/structure_table.h index 7664ec52a5..61c99a6737 100644 --- a/deploy/cpp_infer/include/structure_table.h +++ b/deploy/cpp_infer/include/structure_table.h @@ -25,13 +25,14 @@ namespace PaddleOCR { class StructureTableRecognizer { public: explicit StructureTableRecognizer( - const std::string &model_dir, const bool &use_gpu, const int &gpu_id, + const std::string &model_dir, const bool &use_gpu, const bool &use_mlu,const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const std::string &label_path, const bool &use_tensorrt, const std::string &precision, const int &table_batch_num, const int &table_max_len, const bool &merge_no_span_structure) { this->use_gpu_ = use_gpu; + this->use_mlu_ = use_mlu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; @@ -58,6 +59,7 @@ class StructureTableRecognizer { std::shared_ptr predictor_; bool use_gpu_ = false; + bool use_mlu_ = false; int gpu_id_ = 0; int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; diff --git a/deploy/cpp_infer/src/args.cpp b/deploy/cpp_infer/src/args.cpp index 42ef4cea65..d4de903115 100644 --- a/deploy/cpp_infer/src/args.cpp +++ b/deploy/cpp_infer/src/args.cpp @@ -16,6 +16,7 @@ // common args DEFINE_bool(use_gpu, false, "Infering with GPU or CPU."); +DEFINE_bool(use_mlu, false, "Infering with MLU or CPU."); DEFINE_bool(use_tensorrt, false, "Whether use tensorrt."); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute."); DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU."); diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp index 8c1897598c..171aa7460c 100644 --- a/deploy/cpp_infer/src/ocr_cls.cpp +++ b/deploy/cpp_infer/src/ocr_cls.cpp @@ -123,7 +123,9 @@ void Classifier::LoadModel(const std::string &model_dir) { config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true); } } - } else { + } else if (this->use_mlu_) { + config.EnableCustomDevice("mlu", this->gpu_id_); + } else { config.DisableGpu(); if (this->use_mkldnn_) { config.EnableMKLDNN(); diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 23f36e8897..1747d6dc86 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -39,6 +39,8 @@ void DBDetector::LoadModel(const std::string &model_dir) { config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true); } } + } else if (this->use_mlu_) { + config.EnableCustomDevice("mlu", this->gpu_id_); } else { config.DisableGpu(); if (this->use_mkldnn_) { diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index da1822af31..7f4b9569d0 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -152,6 +152,8 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true); } } + } else if (this->use_mlu_) { + config.EnableCustomDevice("mlu", this->gpu_id_); } else { config.DisableGpu(); if (this->use_mkldnn_) { diff --git a/deploy/cpp_infer/src/paddleocr.cpp b/deploy/cpp_infer/src/paddleocr.cpp index e0956474d1..82f1a10ade 100644 --- a/deploy/cpp_infer/src/paddleocr.cpp +++ b/deploy/cpp_infer/src/paddleocr.cpp @@ -22,7 +22,7 @@ namespace PaddleOCR { PPOCR::PPOCR() { if (FLAGS_det) { this->detector_.reset(new DBDetector( - FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_use_mlu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_limit_type, FLAGS_limit_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_score_mode, FLAGS_use_dilation, @@ -31,13 +31,13 @@ PPOCR::PPOCR() { if (FLAGS_cls && FLAGS_use_angle_cls) { this->classifier_.reset(new Classifier( - FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_use_mlu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cls_thresh, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num)); } if (FLAGS_rec) { this->recognizer_.reset(new CRNNRecognizer( - FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_use_mlu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num, FLAGS_rec_img_h, FLAGS_rec_img_w)); diff --git a/deploy/cpp_infer/src/paddlestructure.cpp b/deploy/cpp_infer/src/paddlestructure.cpp index bde687e2c2..9ad0bde65b 100644 --- a/deploy/cpp_infer/src/paddlestructure.cpp +++ b/deploy/cpp_infer/src/paddlestructure.cpp @@ -22,14 +22,14 @@ namespace PaddleOCR { PaddleStructure::PaddleStructure() { if (FLAGS_layout) { this->layout_model_.reset(new StructureLayoutRecognizer( - FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_use_mlu,FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_layout_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_layout_score_threshold, FLAGS_layout_nms_threshold)); } if (FLAGS_table) { this->table_model_.reset(new StructureTableRecognizer( - FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, + FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_use_mlu,FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num, FLAGS_table_max_len, FLAGS_merge_no_span_structure)); diff --git a/deploy/cpp_infer/src/structure_layout.cpp b/deploy/cpp_infer/src/structure_layout.cpp index 01531bca29..177ed1124e 100644 --- a/deploy/cpp_infer/src/structure_layout.cpp +++ b/deploy/cpp_infer/src/structure_layout.cpp @@ -126,6 +126,8 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { config.EnableTunedTensorRtDynamicShape("./trt_layout_shape.txt", true); } } + } else if (this->use_mlu_) { + config.EnableCustomDevice("mlu", this->gpu_id_); } else { config.DisableGpu(); if (this->use_mkldnn_) { diff --git a/deploy/cpp_infer/src/structure_table.cpp b/deploy/cpp_infer/src/structure_table.cpp index cae6ad8655..9913817855 100644 --- a/deploy/cpp_infer/src/structure_table.cpp +++ b/deploy/cpp_infer/src/structure_table.cpp @@ -139,6 +139,8 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) { config.EnableTunedTensorRtDynamicShape("./trt_table_shape.txt", true); } } + } else if (this->use_gpu_) { + config.EnableCustomDevice("mlu", this->gpu_id_); } else { config.DisableGpu(); if (this->use_mkldnn_) {