Skip to content

Commit

Permalink
[GPU] Apply #27407 and #27553 PRs to 2024.5 release (#27665)
Browse files Browse the repository at this point in the history
### Details:
- PR includes improvement and fix to weightless cache feature added in
2024.5.
 - Changes combine #27407 and #27553 code.

### Tickets:
 - https://jira.devtools.intel.com/browse/CVS-157364
  • Loading branch information
tkrupa-intel authored Dec 5, 2024
1 parent f6c3cec commit f5db909
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 6 deletions.
12 changes: 12 additions & 0 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,18 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(

ov::AnyMap update_config = config;
update_config[ov::loaded_from_cache.name()] = true;

if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
std::string weights_path = cacheContent.modelPath;
auto pos = weights_path.rfind('.');
if (pos != weights_path.npos && weights_path.substr(pos) == ".xml") {
weights_path = weights_path.substr(0, pos);
weights_path += ".bin";
}
if (ov::util::file_exists(weights_path)) {
update_config[ov::weights_path.name()] = weights_path;
}
}
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
: plugin.import_model(networkStream, update_config);
});
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,8 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW},
ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW},
};

return supported_properties;
Expand Down
31 changes: 26 additions & 5 deletions src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,22 @@
#include "openvino/pass/serialize.hpp"

namespace {
class CheckWeightlessCacheAccuracy : public ::testing::Test {
class CheckWeightlessCacheAccuracy : public ::testing::Test,
public ::testing::WithParamInterface<bool> {
public:
static std::string get_test_case_name(::testing::TestParamInfo<bool> obj) {
bool use_compile_model_api = obj.param;

std::ostringstream result;
result << "use_compile_model_api=" << use_compile_model_api;
return result.str();
}
protected:
std::shared_ptr<ov::Model> model;
std::string xml_path;
std::string bin_path;
std::string cache_path;
bool use_compile_model_api; // for loading from cache

void SetUp() override;
void TearDown() override;
Expand All @@ -51,6 +61,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
xml_path = filePrefix + ".xml";
bin_path = filePrefix + ".bin";
cache_path = filePrefix + ".blob";
use_compile_model_api = GetParam();
}

void CheckWeightlessCacheAccuracy::TearDown() {
Expand All @@ -74,7 +85,13 @@ void CheckWeightlessCacheAccuracy::run() {

auto ifstr = std::ifstream(cache_path, std::ifstream::binary);
ov::CompiledModel imported_model;
OV_ASSERT_NO_THROW(imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path));
if (use_compile_model_api) {
OV_ASSERT_NO_THROW(imported_model =
core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config));
} else {
OV_ASSERT_NO_THROW(imported_model =
core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path));
}
ifstr.close();

auto orig_req = compiled_model.create_infer_request();
Expand All @@ -98,19 +115,23 @@ void CheckWeightlessCacheAccuracy::run() {
}
}

TEST_F(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) {
TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) {
model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16);
run();
}

TEST_F(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) {
TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) {
model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16);
run();
}

TEST_F(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16);
run();
}

INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy,
::testing::Bool(),
CheckWeightlessCacheAccuracy::get_test_case_name);

} // namespace

0 comments on commit f5db909

Please sign in to comment.