From 82f191b0e779e5468ac4594921027d30068335a3 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Mon, 30 Oct 2023 16:24:36 +0800 Subject: [PATCH] choose Pcore to compile model for GPU plugin (#20472) * choose Pcore to compile model for GPU plugin * provide function to update executor config * set callback executor to nullptr for GPU plugin * fix code style * fix warning * optimize duplicate code * set callback executor to nullptr for another gpu compile_model * add description for new function * add smoke test * fix code style * modify function definition --------- Co-authored-by: Wanglei Shen --- .../runtime/threading/istreams_executor.hpp | 13 ++ .../src/dev/threading/istreams_executor.cpp | 102 +++++++++++ .../unit/update_executor_config_test.cpp | 165 ++++++++++++++++++ src/plugins/intel_gpu/src/graph/program.cpp | 26 +-- .../intel_gpu/src/plugin/compiled_model.cpp | 6 +- 5 files changed, 289 insertions(+), 23 deletions(-) create mode 100644 src/inference/tests/unit/update_executor_config_test.cpp diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp index 4167da60da00de..738377ddce4d1e 100644 --- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp +++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp @@ -150,6 +150,19 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor { _threadPreferredCoreType(threadPreferredCoreType), _streams_info_table{streamsInfoTable}, _cpu_reservation{cpuReservation} {} + + /** + * @brief Modify _streams_info_table and related configuration according to user-specified parameters, bind + * threads to cpu cores if cpu_pinning is true. + * @param stream_nums Number of streams specified by user + * @param threads_per_stream Number of threads per stream specified by user + * @param core_type Cpu type (Big/Little/Any) specified by user + * @param cpu_pinning Whether to bind the threads to cpu cores + */ + void update_executor_config(int stream_nums, + int threads_per_stream, + PreferredCoreType core_type, + bool cpu_pinning); }; /** diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp index 92d297a62ecb30..518cdd08c69c7f 100644 --- a/src/inference/src/dev/threading/istreams_executor.cpp +++ b/src/inference/src/dev/threading/istreams_executor.cpp @@ -553,5 +553,107 @@ IStreamsExecutor::Config IStreamsExecutor::Config::reserve_cpu_threads(const ISt return config; } +void IStreamsExecutor::Config::update_executor_config(int stream_nums, + int threads_per_stream, + IStreamsExecutor::Config::PreferredCoreType core_type, + bool cpu_pinning) { + const auto proc_type_table = ov::get_proc_type_table(); + + if (proc_type_table.empty()) { + return; + } + + // IStreamsExecutor::Config config = initial; + const auto total_num_cores = proc_type_table[0][ALL_PROC]; + const auto total_num_big_cores = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]; + const auto total_num_little_cores = proc_type_table[0][EFFICIENT_CORE_PROC]; + + int num_cores = total_num_cores; + if (core_type == ov::threading::IStreamsExecutor::Config::BIG) { + num_cores = total_num_big_cores; + } else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) { + num_cores = total_num_little_cores; + } + + int streams = std::min(stream_nums, num_cores); + + if (streams == 0) { + return; + } + + _streams = streams; + _threadPreferredCoreType = core_type; + _threadsPerStream = threads_per_stream; + + // create stream_info_table based on core type + std::vector stream_info(ov::CPU_STREAMS_TABLE_SIZE, 0); + stream_info[ov::THREADS_PER_STREAM] = _threadsPerStream; + stream_info[ov::STREAM_NUMA_NODE_ID] = 0; + stream_info[ov::STREAM_SOCKET_ID] = 0; + if (core_type == ov::threading::IStreamsExecutor::Config::BIG) { + if (proc_type_table[0][ov::MAIN_CORE_PROC] < _streams) { + stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::MAIN_CORE_PROC]; + stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC; + _streams_info_table.push_back(stream_info); + stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::HYPER_THREADING_PROC]; + stream_info[ov::PROC_TYPE] = ov::HYPER_THREADING_PROC; + _streams_info_table.push_back(stream_info); + } else { + stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC; + stream_info[ov::NUMBER_OF_STREAMS] = _streams; + _streams_info_table.push_back(stream_info); + } + } else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) { + stream_info[ov::PROC_TYPE] = ov::EFFICIENT_CORE_PROC; + stream_info[ov::NUMBER_OF_STREAMS] = _streams; + _streams_info_table.push_back(stream_info); + } else { + int total_streams = 0; + if (proc_type_table.size() == 1) { + for (int i = ov::MAIN_CORE_PROC; i <= ov::HYPER_THREADING_PROC; i++) { + if (proc_type_table[0][i] > 0) { + stream_info[ov::NUMBER_OF_STREAMS] = + (total_streams + proc_type_table[0][i] > _streams ? _streams - total_streams + : proc_type_table[0][i]); + stream_info[ov::PROC_TYPE] = i; + stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[0][PROC_NUMA_NODE_ID]; + stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[0][PROC_SOCKET_ID]; + _streams_info_table.push_back(stream_info); + total_streams += stream_info[ov::NUMBER_OF_STREAMS]; + } + if (total_streams >= _streams) + break; + } + } else { + for (size_t i = 1; i < proc_type_table.size(); i++) { + for (int j = ov::MAIN_CORE_PROC; j < ov::HYPER_THREADING_PROC; j++) { + if (proc_type_table[i][j] > 0) { + stream_info[ov::NUMBER_OF_STREAMS] = + (total_streams + proc_type_table[i][j] > _streams ? _streams - total_streams + : proc_type_table[i][j]); + stream_info[ov::PROC_TYPE] = j; + stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[i][PROC_NUMA_NODE_ID]; + stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[i][PROC_SOCKET_ID]; + _streams_info_table.push_back(stream_info); + total_streams += stream_info[ov::NUMBER_OF_STREAMS]; + } + if (total_streams >= _streams) + break; + } + if (total_streams >= _streams) + break; + } + } + } + + if (cpu_pinning) { + _cpu_reservation = cpu_pinning; + auto new_config = reserve_cpu_threads(*this); + _stream_processor_ids = new_config._stream_processor_ids; + _streams = new_config._streams; + _threads = new_config._threads; + } +} + } // namespace threading } // namespace ov diff --git a/src/inference/tests/unit/update_executor_config_test.cpp b/src/inference/tests/unit/update_executor_config_test.cpp new file mode 100644 index 00000000000000..a660dfff0597ae --- /dev/null +++ b/src/inference/tests/unit/update_executor_config_test.cpp @@ -0,0 +1,165 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +// #include "ie_system_conf.h" +#include "openvino/runtime/threading/istreams_executor.hpp" +#include "os/cpu_map_info.hpp" + +using namespace testing; +using namespace ov; +using namespace threading; + +namespace { + +#if defined(__linux__) || defined(_WIN32) + +struct UpdateExecutorConfigTestCase { + ov::threading::IStreamsExecutor::Config _config; + std::vector> _proc_type_table; + std::vector> _cpu_mapping_table; + int _num_streams; + int _threads_per_stream; + ov::threading::IStreamsExecutor::Config::PreferredCoreType _core_type; + bool _cpu_pinning; + std::vector> _streams_info_table; + std::vector> _stream_processors; +}; + +class UpdateExecutorConfigTest : public ov::test::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + auto test_data = std::get<0>(GetParam()); + + CPU& cpu = cpu_info(); + cpu._org_proc_type_table = test_data._proc_type_table; + cpu._proc_type_table = test_data._proc_type_table; + cpu._cpu_mapping_table = test_data._cpu_mapping_table; + cpu._numa_nodes = 1; + + test_data._config.update_executor_config(test_data._num_streams, + test_data._threads_per_stream, + test_data._core_type, + test_data._cpu_pinning); + + ASSERT_EQ(test_data._num_streams, test_data._config._streams); + ASSERT_EQ(test_data._threads_per_stream, test_data._config._threadsPerStream); + ASSERT_EQ(test_data._core_type, test_data._config._threadPreferredCoreType); + ASSERT_EQ(test_data._cpu_pinning, test_data._config._cpu_reservation); + ASSERT_EQ(test_data._num_streams, test_data._config._streams); + ASSERT_EQ(test_data._streams_info_table, test_data._config._streams_info_table); + ASSERT_EQ(test_data._stream_processors, test_data._config._stream_processor_ids); + } +}; + +UpdateExecutorConfigTestCase _update_num_streams = { + ov::threading::IStreamsExecutor::Config{"update num streams test"}, // param[in]: initial configuration + // param[in]: proc_type_table, {total processors, number of physical processors, number of Efficient processors, + // number of hyper threading processors} + { + {12, 6, 0, 6, 0, 0}, + }, + // param[in]: cpu_mapping_table, {PROCESSOR_ID, NUMA_ID, SOCKET_ID, CORE_ID, CORE_TYPE, GROUP_ID, Used} + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, + {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, + {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, + {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1}, + {10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, + {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1}, + }, + 4, // param[in]: the number of streams + 1, // param[in]: the number of threads per stream + ov::threading::IStreamsExecutor::Config::ANY, // param[in]: specified cpu core type + false, // param[in]: specified cpu pinning + // param[out]: streams_info_table, {NUMBER_OF_STREAMS, PROC_TYPE, THREADS_PER_STREAM, STREAM_NUMA_NODE_ID, + // STREAM_SOCKET_ID} + { + {4, MAIN_CORE_PROC, 1, 0, 0}, + }, + // param[out]: stream_processors, the list of processor ids on each stream. + {}, +}; + +UpdateExecutorConfigTestCase _update_core_type = { + ov::threading::IStreamsExecutor::Config{"update core type test"}, + { + {24, 8, 8, 8, 0, 0}, + }, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1}, + {10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1}, + {12, 0, 0, 6, MAIN_CORE_PROC, 12, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 13, -1}, + {14, 0, 0, 7, MAIN_CORE_PROC, 14, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 15, -1}, + {16, 0, 0, 8, EFFICIENT_CORE_PROC, 16, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 17, -1}, + {18, 0, 0, 10, EFFICIENT_CORE_PROC, 18, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 19, -1}, + {20, 0, 0, 12, EFFICIENT_CORE_PROC, 20, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 21, -1}, + {22, 0, 0, 14, EFFICIENT_CORE_PROC, 22, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 23, -1}, + }, + 8, + 1, + ov::threading::IStreamsExecutor::Config::LITTLE, + false, + { + {8, EFFICIENT_CORE_PROC, 1, 0, 0}, + }, + {}, +}; + +UpdateExecutorConfigTestCase _update_cpu_pinning = { + ov::threading::IStreamsExecutor::Config{"update cpu pinning test"}, + { + {8, 4, 0, 4, 0, 0}, + }, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, + {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, + {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1}, + }, + 8, + 1, + ov::threading::IStreamsExecutor::Config::ANY, + true, + { + {4, MAIN_CORE_PROC, 1, 0, 0}, + {4, HYPER_THREADING_PROC, 1, 0, 0}, + }, + { + {0}, + {2}, + {4}, + {6}, + {1}, + {3}, + {5}, + {7}, + }, +}; + +TEST_P(UpdateExecutorConfigTest, UpdateExecutorConfig) {} + +INSTANTIATE_TEST_SUITE_P(smoke_UpdateExecutorConfig, + UpdateExecutorConfigTest, + testing::Values(_update_num_streams, _update_core_type, _update_cpu_pinning)); +#endif +} // namespace diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index dde29dc1e32504..9bd5d57090c7ef 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -3,6 +3,7 @@ // #include "openvino/runtime/system_conf.hpp" +#include "openvino/runtime/threading/cpu_streams_info.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" @@ -104,26 +105,6 @@ using namespace cldnn; using namespace ov::intel_gpu; -static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) { - if (ov::get_available_cores_types().size() == 1) { - return; - } - - const auto total_num_cores = ov::get_number_of_logical_cpu_cores(); - const auto total_num_big_cores = ov::get_number_of_logical_cpu_cores(true); - const auto total_num_little_cores = total_num_cores - total_num_big_cores; - auto core_type = config._threadPreferredCoreType; - - int num_cores = total_num_cores; - if (core_type == ov::threading::IStreamsExecutor::Config::BIG) { - num_cores = total_num_big_cores; - } else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) { - num_cores = total_num_little_cores; - } - - config._streams = std::min(config._streams, num_cores); -} - static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { ov::threading::IStreamsExecutor::Config task_executor_config(tags, 1); task_executor_config._streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); @@ -135,7 +116,10 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - adjust_num_cores(task_executor_config); + task_executor_config.update_executor_config(task_executor_config._streams, + 1, + task_executor_config._threadPreferredCoreType, + false); return task_executor_config; } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 60d9a66bca3122..48d75b4640cf6b 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -63,7 +63,8 @@ CompiledModel::CompiledModel(std::shared_ptr model, : ov::ICompiledModel(model, plugin, wrap_if_old_api(context, plugin->is_new_api()), - create_task_executor(plugin, config)) + create_task_executor(plugin, config), + nullptr) , m_context(context) , m_config(config) , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})) @@ -86,7 +87,8 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib, : ov::ICompiledModel(nullptr, plugin, wrap_if_old_api(context, plugin->is_new_api()), - create_task_executor(plugin, config)) + create_task_executor(plugin, config), + nullptr) , m_context(context) , m_config(config) , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))