Skip to content

Commit

Permalink
choose Pcore to compile model for GPU plugin (openvinotoolkit#20472)
Browse files Browse the repository at this point in the history
* choose Pcore to compile model for GPU plugin

* provide function to update executor config

* set callback executor to nullptr for GPU plugin

* fix code style

* fix warning

* optimize duplicate code

* set callback executor to nullptr for another gpu compile_model

* add description for new function

* add smoke test

* fix code style

* modify function definition

---------

Co-authored-by: Wanglei Shen <[email protected]>
  • Loading branch information
xufang-lisa and wangleis authored Oct 30, 2023
1 parent cec6535 commit 82f191b
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,19 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
_threadPreferredCoreType(threadPreferredCoreType),
_streams_info_table{streamsInfoTable},
_cpu_reservation{cpuReservation} {}

/**
* @brief Modify _streams_info_table and related configuration according to user-specified parameters, bind
* threads to cpu cores if cpu_pinning is true.
* @param stream_nums Number of streams specified by user
* @param threads_per_stream Number of threads per stream specified by user
* @param core_type Cpu type (Big/Little/Any) specified by user
* @param cpu_pinning Whether to bind the threads to cpu cores
*/
void update_executor_config(int stream_nums,
int threads_per_stream,
PreferredCoreType core_type,
bool cpu_pinning);
};

/**
Expand Down
102 changes: 102 additions & 0 deletions src/inference/src/dev/threading/istreams_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,5 +553,107 @@ IStreamsExecutor::Config IStreamsExecutor::Config::reserve_cpu_threads(const ISt
return config;
}

void IStreamsExecutor::Config::update_executor_config(int stream_nums,
int threads_per_stream,
IStreamsExecutor::Config::PreferredCoreType core_type,
bool cpu_pinning) {
const auto proc_type_table = ov::get_proc_type_table();

if (proc_type_table.empty()) {
return;
}

// IStreamsExecutor::Config config = initial;
const auto total_num_cores = proc_type_table[0][ALL_PROC];
const auto total_num_big_cores = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC];
const auto total_num_little_cores = proc_type_table[0][EFFICIENT_CORE_PROC];

int num_cores = total_num_cores;
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
num_cores = total_num_big_cores;
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
num_cores = total_num_little_cores;
}

int streams = std::min(stream_nums, num_cores);

if (streams == 0) {
return;
}

_streams = streams;
_threadPreferredCoreType = core_type;
_threadsPerStream = threads_per_stream;

// create stream_info_table based on core type
std::vector<int> stream_info(ov::CPU_STREAMS_TABLE_SIZE, 0);
stream_info[ov::THREADS_PER_STREAM] = _threadsPerStream;
stream_info[ov::STREAM_NUMA_NODE_ID] = 0;
stream_info[ov::STREAM_SOCKET_ID] = 0;
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
if (proc_type_table[0][ov::MAIN_CORE_PROC] < _streams) {
stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::MAIN_CORE_PROC];
stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC;
_streams_info_table.push_back(stream_info);
stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::HYPER_THREADING_PROC];
stream_info[ov::PROC_TYPE] = ov::HYPER_THREADING_PROC;
_streams_info_table.push_back(stream_info);
} else {
stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC;
stream_info[ov::NUMBER_OF_STREAMS] = _streams;
_streams_info_table.push_back(stream_info);
}
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
stream_info[ov::PROC_TYPE] = ov::EFFICIENT_CORE_PROC;
stream_info[ov::NUMBER_OF_STREAMS] = _streams;
_streams_info_table.push_back(stream_info);
} else {
int total_streams = 0;
if (proc_type_table.size() == 1) {
for (int i = ov::MAIN_CORE_PROC; i <= ov::HYPER_THREADING_PROC; i++) {
if (proc_type_table[0][i] > 0) {
stream_info[ov::NUMBER_OF_STREAMS] =
(total_streams + proc_type_table[0][i] > _streams ? _streams - total_streams
: proc_type_table[0][i]);
stream_info[ov::PROC_TYPE] = i;
stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[0][PROC_NUMA_NODE_ID];
stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[0][PROC_SOCKET_ID];
_streams_info_table.push_back(stream_info);
total_streams += stream_info[ov::NUMBER_OF_STREAMS];
}
if (total_streams >= _streams)
break;
}
} else {
for (size_t i = 1; i < proc_type_table.size(); i++) {
for (int j = ov::MAIN_CORE_PROC; j < ov::HYPER_THREADING_PROC; j++) {
if (proc_type_table[i][j] > 0) {
stream_info[ov::NUMBER_OF_STREAMS] =
(total_streams + proc_type_table[i][j] > _streams ? _streams - total_streams
: proc_type_table[i][j]);
stream_info[ov::PROC_TYPE] = j;
stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[i][PROC_NUMA_NODE_ID];
stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[i][PROC_SOCKET_ID];
_streams_info_table.push_back(stream_info);
total_streams += stream_info[ov::NUMBER_OF_STREAMS];
}
if (total_streams >= _streams)
break;
}
if (total_streams >= _streams)
break;
}
}
}

if (cpu_pinning) {
_cpu_reservation = cpu_pinning;
auto new_config = reserve_cpu_threads(*this);
_stream_processor_ids = new_config._stream_processor_ids;
_streams = new_config._streams;
_threads = new_config._threads;
}
}

} // namespace threading
} // namespace ov
165 changes: 165 additions & 0 deletions src/inference/tests/unit/update_executor_config_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include <common_test_utils/test_common.hpp>

// #include "ie_system_conf.h"
#include "openvino/runtime/threading/istreams_executor.hpp"
#include "os/cpu_map_info.hpp"

using namespace testing;
using namespace ov;
using namespace threading;

namespace {

#if defined(__linux__) || defined(_WIN32)

struct UpdateExecutorConfigTestCase {
ov::threading::IStreamsExecutor::Config _config;
std::vector<std::vector<int>> _proc_type_table;
std::vector<std::vector<int>> _cpu_mapping_table;
int _num_streams;
int _threads_per_stream;
ov::threading::IStreamsExecutor::Config::PreferredCoreType _core_type;
bool _cpu_pinning;
std::vector<std::vector<int>> _streams_info_table;
std::vector<std::vector<int>> _stream_processors;
};

class UpdateExecutorConfigTest : public ov::test::TestsCommon,
public testing::WithParamInterface<std::tuple<UpdateExecutorConfigTestCase>> {
public:
void SetUp() override {
auto test_data = std::get<0>(GetParam());

CPU& cpu = cpu_info();
cpu._org_proc_type_table = test_data._proc_type_table;
cpu._proc_type_table = test_data._proc_type_table;
cpu._cpu_mapping_table = test_data._cpu_mapping_table;
cpu._numa_nodes = 1;

test_data._config.update_executor_config(test_data._num_streams,
test_data._threads_per_stream,
test_data._core_type,
test_data._cpu_pinning);

ASSERT_EQ(test_data._num_streams, test_data._config._streams);
ASSERT_EQ(test_data._threads_per_stream, test_data._config._threadsPerStream);
ASSERT_EQ(test_data._core_type, test_data._config._threadPreferredCoreType);
ASSERT_EQ(test_data._cpu_pinning, test_data._config._cpu_reservation);
ASSERT_EQ(test_data._num_streams, test_data._config._streams);
ASSERT_EQ(test_data._streams_info_table, test_data._config._streams_info_table);
ASSERT_EQ(test_data._stream_processors, test_data._config._stream_processor_ids);
}
};

UpdateExecutorConfigTestCase _update_num_streams = {
ov::threading::IStreamsExecutor::Config{"update num streams test"}, // param[in]: initial configuration
// param[in]: proc_type_table, {total processors, number of physical processors, number of Efficient processors,
// number of hyper threading processors}
{
{12, 6, 0, 6, 0, 0},
},
// param[in]: cpu_mapping_table, {PROCESSOR_ID, NUMA_ID, SOCKET_ID, CORE_ID, CORE_TYPE, GROUP_ID, Used}
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1},
{3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1},
{5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1},
{7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 4, MAIN_CORE_PROC, 8, -1},
{9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 5, MAIN_CORE_PROC, 10, -1},
{11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1},
},
4, // param[in]: the number of streams
1, // param[in]: the number of threads per stream
ov::threading::IStreamsExecutor::Config::ANY, // param[in]: specified cpu core type
false, // param[in]: specified cpu pinning
// param[out]: streams_info_table, {NUMBER_OF_STREAMS, PROC_TYPE, THREADS_PER_STREAM, STREAM_NUMA_NODE_ID,
// STREAM_SOCKET_ID}
{
{4, MAIN_CORE_PROC, 1, 0, 0},
},
// param[out]: stream_processors, the list of processor ids on each stream.
{},
};

UpdateExecutorConfigTestCase _update_core_type = {
ov::threading::IStreamsExecutor::Config{"update core type test"},
{
{24, 8, 8, 8, 0, 0},
},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1},
{12, 0, 0, 6, MAIN_CORE_PROC, 12, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 13, -1},
{14, 0, 0, 7, MAIN_CORE_PROC, 14, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 15, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 16, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 17, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 18, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 19, -1},
{20, 0, 0, 12, EFFICIENT_CORE_PROC, 20, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 21, -1},
{22, 0, 0, 14, EFFICIENT_CORE_PROC, 22, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 23, -1},
},
8,
1,
ov::threading::IStreamsExecutor::Config::LITTLE,
false,
{
{8, EFFICIENT_CORE_PROC, 1, 0, 0},
},
{},
};

UpdateExecutorConfigTestCase _update_cpu_pinning = {
ov::threading::IStreamsExecutor::Config{"update cpu pinning test"},
{
{8, 4, 0, 4, 0, 0},
},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1},
{3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1},
{5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1},
{7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
},
8,
1,
ov::threading::IStreamsExecutor::Config::ANY,
true,
{
{4, MAIN_CORE_PROC, 1, 0, 0},
{4, HYPER_THREADING_PROC, 1, 0, 0},
},
{
{0},
{2},
{4},
{6},
{1},
{3},
{5},
{7},
},
};

TEST_P(UpdateExecutorConfigTest, UpdateExecutorConfig) {}

INSTANTIATE_TEST_SUITE_P(smoke_UpdateExecutorConfig,
UpdateExecutorConfigTest,
testing::Values(_update_num_streams, _update_core_type, _update_cpu_pinning));
#endif
} // namespace
26 changes: 5 additions & 21 deletions src/plugins/intel_gpu/src/graph/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

#include "openvino/runtime/system_conf.hpp"
#include "openvino/runtime/threading/cpu_streams_info.hpp"

#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
Expand Down Expand Up @@ -104,26 +105,6 @@
using namespace cldnn;
using namespace ov::intel_gpu;

static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) {
if (ov::get_available_cores_types().size() == 1) {
return;
}

const auto total_num_cores = ov::get_number_of_logical_cpu_cores();
const auto total_num_big_cores = ov::get_number_of_logical_cpu_cores(true);
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
auto core_type = config._threadPreferredCoreType;

int num_cores = total_num_cores;
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
num_cores = total_num_big_cores;
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
num_cores = total_num_little_cores;
}

config._streams = std::min(config._streams, num_cores);
}

static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) {
ov::threading::IStreamsExecutor::Config task_executor_config(tags, 1);
task_executor_config._streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads);
Expand All @@ -135,7 +116,10 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
}

adjust_num_cores(task_executor_config);
task_executor_config.update_executor_config(task_executor_config._streams,
1,
task_executor_config._threadPreferredCoreType,
false);

return task_executor_config;
}
Expand Down
6 changes: 4 additions & 2 deletions src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ CompiledModel::CompiledModel(std::shared_ptr<ov::Model> model,
: ov::ICompiledModel(model,
plugin,
wrap_if_old_api(context, plugin->is_new_api()),
create_task_executor(plugin, config))
create_task_executor(plugin, config),
nullptr)
, m_context(context)
, m_config(config)
, m_wait_executor(std::make_shared<ov::threading::CPUStreamsExecutor>(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))
Expand All @@ -86,7 +87,8 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib,
: ov::ICompiledModel(nullptr,
plugin,
wrap_if_old_api(context, plugin->is_new_api()),
create_task_executor(plugin, config))
create_task_executor(plugin, config),
nullptr)
, m_context(context)
, m_config(config)
, m_wait_executor(std::make_shared<ov::threading::CPUStreamsExecutor>(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))
Expand Down

0 comments on commit 82f191b

Please sign in to comment.