Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Enable encryption of cache blob with CacheMode::OPTIMIZE_SIZE #27912

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,8 @@ struct EncryptionCallbacks {
* when loading from the cache. This property is set in core.compile_model only.
* - First value of the struct is encryption function.
* - Second value of the struct is decryption function.
* @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to
* "OPTIMIZE_SIZE".
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<EncryptionCallbacks, PropertyMutability::WO> cache_encryption_callbacks{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
BinaryOutputBuffer(std::ostream& stream)
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}

void write(void const * data, std::streamsize size) {
virtual ~BinaryOutputBuffer() = default;

virtual void write(void const* data, std::streamsize size) {
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
OPENVINO_ASSERT(written_size == size,
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " +
std::to_string(written_size));
}

virtual void flush() {}

void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }
void set_stream(void* strm) { _strm = strm; }
Expand All @@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
BinaryInputBuffer(std::istream& stream, engine& engine)
: InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}

void read(void* const data, std::streamsize size) {
virtual ~BinaryInputBuffer() = default;

virtual void read(void* const data, std::streamsize size) {
auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
Expand All @@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }

std::streampos tellg() { return _stream.tellg(); }
void seekg(std::streampos pos) { _stream.seekg(pos); }

private:
std::istream& _stream;
void* _impl_params;
};

class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer {
public:
EncryptedBinaryOutputBuffer(std::ostream& stream, std::function<std::string(const std::string&)> encrypt)
: BinaryOutputBuffer(stream),
encrypt(encrypt) {
OPENVINO_ASSERT(encrypt);
}

~EncryptedBinaryOutputBuffer() override = default;

void write(void const* data, std::streamsize size) override {
plaintext_str.append(reinterpret_cast<const char*>(data), size);
}

void flush() override {
auto encrypted_str = encrypt(plaintext_str);
size_t bytes = encrypted_str.size();
BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size());
}

private:
std::string
plaintext_str; // Not using stringstream here because passing to encrypt() would produce an additional copy.
std::function<std::string(const std::string&)> encrypt;
};

class EncryptedBinaryInputBuffer : public BinaryInputBuffer {
public:
EncryptedBinaryInputBuffer(std::istream& stream,
engine& engine,
std::function<std::string(const std::string&)> decrypt)
: BinaryInputBuffer(stream, engine),
decrypt(decrypt) {
OPENVINO_ASSERT(decrypt);

size_t bytes;
BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));

// Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional
// copy.
std::string str(bytes, 0);
BinaryInputBuffer::read(
make_data(const_cast<void*>(reinterpret_cast<const void*>(str.c_str())), str.size()).data,
str.size());
plaintext_stream.str(decrypt(str));
}

~EncryptedBinaryInputBuffer() override = default;

void read(void* const data, std::streamsize size) override {
auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(
read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}

private:
std::stringstream plaintext_stream;
std::function<std::string(const std::string&)> decrypt;
};

template <typename T>
class Serializer<BinaryOutputBuffer, T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
public:
Expand Down
12 changes: 11 additions & 1 deletion src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");

cldnn::BinaryOutputBuffer ob(model);
const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks);

// Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
std::unique_ptr<cldnn::BinaryOutputBuffer> ob_ptr =
encryption_enabled
? cldnn::make_unique<cldnn::EncryptedBinaryOutputBuffer>(model, encryption_callbacks.encrypt)
: cldnn::make_unique<cldnn::BinaryOutputBuffer>(model);
auto& ob = *ob_ptr;

ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));

// Inputs
Expand Down Expand Up @@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const {
}

get_graph(0)->export_model(ob);
ob.flush();
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
18 changes: 14 additions & 4 deletions src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,21 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
config.set_user_property(_orig_config);
config.apply_user_properties(context_impl->get_engine().get_device_info());

cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());
ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;

ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
encryption_enabled ? cldnn::make_unique<cldnn::EncryptedBinaryInputBuffer>(model,
context_impl->get_engine(),
encryption_callbacks.decrypt)
: cldnn::make_unique<cldnn::BinaryInputBuffer>(model, context_impl->get_engine());
auto& ib = *ib_ptr;

if (cache_mode != config.get_property(ov::cache_mode)) {
ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode));

if (loaded_cache_mode != cache_mode) {
return nullptr;
}

Expand Down Expand Up @@ -608,6 +617,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW},
ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::kv_cache_precision.name(), PropertyMutability::RW},
};

Expand Down
19 changes: 16 additions & 3 deletions src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,24 @@
#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
#include "common_test_utils/test_common.hpp"
#include "openvino/pass/serialize.hpp"
#include "openvino/util/codec_xor.hpp"

namespace {
typedef std::tuple<bool, ov::element::Type, ov::element::Type> testParams;
typedef std::tuple<bool, bool, ov::element::Type, ov::element::Type> testParams;

class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface<testParams> {
public:
static std::string get_test_case_name(::testing::TestParamInfo<testParams> obj) {
bool use_compile_model_api_;
bool do_encryption_;
ov::element::Type inference_mode_;
ov::element::Type model_dtype_;
std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param;
std::tie(use_compile_model_api_, do_encryption_, inference_mode_, model_dtype_) = obj.param;

std::ostringstream result;
const char separator = '_';
result << "use_compile_model_api=" << use_compile_model_api_ << separator;
result << "_do_encryption=" << do_encryption_;
result << "inference_mode=" << inference_mode_ << separator;
result << "model_dtype=" << model_dtype_;
return result.str();
Expand All @@ -40,6 +43,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::W
std::string cache_path;
std::string cache_dir;
bool use_compile_model_api; // for loading from cache
bool do_encryption;
ov::element::Type inference_mode;
ov::element::Type model_dtype;

Expand All @@ -55,7 +59,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
cache_path = filePrefix + ".blob";
cache_dir = filePrefix + "_cache_dir";

std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam();
std::tie(use_compile_model_api, do_encryption, inference_mode, model_dtype) = GetParam();
}

void CheckWeightlessCacheAccuracy::TearDown() {
Expand All @@ -75,6 +79,14 @@ void CheckWeightlessCacheAccuracy::run() {
ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
ov::weights_path(bin_path),
ov::hint::inference_precision(inference_mode)};

if (do_encryption) {
ov::EncryptionCallbacks encryption_callbacks;
encryption_callbacks.encrypt = ov::util::codec_xor;
encryption_callbacks.decrypt = ov::util::codec_xor;
config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks));
}
auto core = ov::test::utils::PluginCache::get().core();
ov::pass::Serialize(xml_path, bin_path).run_on_model(model);

Expand Down Expand Up @@ -150,6 +162,7 @@ const std::vector<ov::element::Type> model_dtypes = {
INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
CheckWeightlessCacheAccuracy,
::testing::Combine(::testing::Bool(),
::testing::Bool(),
::testing::ValuesIn(inference_modes),
::testing::ValuesIn(model_dtypes)),
CheckWeightlessCacheAccuracy::get_test_case_name);
Expand Down
Loading