Skip to content

Commit

Permalink
[GPU] Enable encryption of cache blob with CacheMode::OPTIMIZE_SIZE (#…
Browse files Browse the repository at this point in the history
…27912)

### Details:
- Enables encryption of cache blob with CacheMode::OPTIMIZE_SIZE in GPU
Plugin.
- Some additional test coverage already present in
src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp.
Test coverage in this PR is distinct from those tests because it also
checks correctness of the results.
- #27742 has to be merged first - it guarantees small cache sizes in
majority of cases which is important for encryption.

### Tickets:
 - CVS-158140

---------

Co-authored-by: Sergey Shlyapnikov <[email protected]>
  • Loading branch information
tkrupa-intel and sshlyapn authored Dec 13, 2024
1 parent bd9a6d1 commit e3606a5
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 14 deletions.
2 changes: 2 additions & 0 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,8 @@ struct EncryptionCallbacks {
* when loading from the cache. This property is set in core.compile_model only.
* - First value of the struct is encryption function.
* - Second value of the struct is decryption function.
* @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to
* "OPTIMIZE_SIZE".
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<EncryptionCallbacks, PropertyMutability::WO> cache_encryption_callbacks{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
BinaryOutputBuffer(std::ostream& stream)
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}

void write(void const * data, std::streamsize size) {
virtual ~BinaryOutputBuffer() = default;

virtual void write(void const* data, std::streamsize size) {
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
OPENVINO_ASSERT(written_size == size,
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " +
std::to_string(written_size));
}

virtual void flush() {}

void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }
void set_stream(void* strm) { _strm = strm; }
Expand All @@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
BinaryInputBuffer(std::istream& stream, engine& engine)
: InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}

void read(void* const data, std::streamsize size) {
virtual ~BinaryInputBuffer() = default;

virtual void read(void* const data, std::streamsize size) {
auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
Expand All @@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }

std::streampos tellg() { return _stream.tellg(); }
void seekg(std::streampos pos) { _stream.seekg(pos); }

private:
std::istream& _stream;
void* _impl_params;
};

class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer {
public:
EncryptedBinaryOutputBuffer(std::ostream& stream, std::function<std::string(const std::string&)> encrypt)
: BinaryOutputBuffer(stream),
encrypt(encrypt) {
OPENVINO_ASSERT(encrypt);
}

~EncryptedBinaryOutputBuffer() override = default;

void write(void const* data, std::streamsize size) override {
plaintext_str.append(reinterpret_cast<const char*>(data), size);
}

void flush() override {
auto encrypted_str = encrypt(plaintext_str);
size_t bytes = encrypted_str.size();
BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size());
}

private:
std::string
plaintext_str; // Not using stringstream here because passing to encrypt() would produce an additional copy.
std::function<std::string(const std::string&)> encrypt;
};

class EncryptedBinaryInputBuffer : public BinaryInputBuffer {
public:
EncryptedBinaryInputBuffer(std::istream& stream,
engine& engine,
std::function<std::string(const std::string&)> decrypt)
: BinaryInputBuffer(stream, engine),
decrypt(decrypt) {
OPENVINO_ASSERT(decrypt);

size_t bytes;
BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));

// Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional
// copy.
std::string str(bytes, 0);
BinaryInputBuffer::read(
make_data(const_cast<void*>(reinterpret_cast<const void*>(str.c_str())), str.size()).data,
str.size());
plaintext_stream.str(decrypt(str));
}

~EncryptedBinaryInputBuffer() override = default;

void read(void* const data, std::streamsize size) override {
auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(
read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}

private:
std::stringstream plaintext_stream;
std::function<std::string(const std::string&)> decrypt;
};

template <typename T>
class Serializer<BinaryOutputBuffer, T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
public:
Expand Down
12 changes: 11 additions & 1 deletion src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");

cldnn::BinaryOutputBuffer ob(model);
const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks);

// Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
std::unique_ptr<cldnn::BinaryOutputBuffer> ob_ptr =
encryption_enabled
? cldnn::make_unique<cldnn::EncryptedBinaryOutputBuffer>(model, encryption_callbacks.encrypt)
: cldnn::make_unique<cldnn::BinaryOutputBuffer>(model);
auto& ob = *ob_ptr;

ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));

// Inputs
Expand Down Expand Up @@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const {
}

get_graph(0)->export_model(ob);
ob.flush();
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
18 changes: 14 additions & 4 deletions src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,21 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
config.set_user_property(_orig_config);
config.apply_user_properties(context_impl->get_engine().get_device_info());

cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());
ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;

ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
encryption_enabled ? cldnn::make_unique<cldnn::EncryptedBinaryInputBuffer>(model,
context_impl->get_engine(),
encryption_callbacks.decrypt)
: cldnn::make_unique<cldnn::BinaryInputBuffer>(model, context_impl->get_engine());
auto& ib = *ib_ptr;

if (cache_mode != config.get_property(ov::cache_mode)) {
ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode));

if (loaded_cache_mode != cache_mode) {
return nullptr;
}

Expand Down Expand Up @@ -608,6 +617,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW},
ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::kv_cache_precision.name(), PropertyMutability::RW},
};

Expand Down
19 changes: 16 additions & 3 deletions src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,24 @@
#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
#include "common_test_utils/test_common.hpp"
#include "openvino/pass/serialize.hpp"
#include "openvino/util/codec_xor.hpp"

namespace {
typedef std::tuple<bool, ov::element::Type, ov::element::Type> testParams;
typedef std::tuple<bool, bool, ov::element::Type, ov::element::Type> testParams;

class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface<testParams> {
public:
static std::string get_test_case_name(::testing::TestParamInfo<testParams> obj) {
bool use_compile_model_api_;
bool do_encryption_;
ov::element::Type inference_mode_;
ov::element::Type model_dtype_;
std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param;
std::tie(use_compile_model_api_, do_encryption_, inference_mode_, model_dtype_) = obj.param;

std::ostringstream result;
const char separator = '_';
result << "use_compile_model_api=" << use_compile_model_api_ << separator;
result << "_do_encryption=" << do_encryption_;
result << "inference_mode=" << inference_mode_ << separator;
result << "model_dtype=" << model_dtype_;
return result.str();
Expand All @@ -40,6 +43,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::W
std::string cache_path;
std::string cache_dir;
bool use_compile_model_api; // for loading from cache
bool do_encryption;
ov::element::Type inference_mode;
ov::element::Type model_dtype;

Expand All @@ -55,7 +59,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
cache_path = filePrefix + ".blob";
cache_dir = filePrefix + "_cache_dir";

std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam();
std::tie(use_compile_model_api, do_encryption, inference_mode, model_dtype) = GetParam();
}

void CheckWeightlessCacheAccuracy::TearDown() {
Expand All @@ -75,6 +79,14 @@ void CheckWeightlessCacheAccuracy::run() {
ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
ov::weights_path(bin_path),
ov::hint::inference_precision(inference_mode)};

if (do_encryption) {
ov::EncryptionCallbacks encryption_callbacks;
encryption_callbacks.encrypt = ov::util::codec_xor;
encryption_callbacks.decrypt = ov::util::codec_xor;
config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks));
}
auto core = ov::test::utils::PluginCache::get().core();
ov::pass::Serialize(xml_path, bin_path).run_on_model(model);

Expand Down Expand Up @@ -150,6 +162,7 @@ const std::vector<ov::element::Type> model_dtypes = {
INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
CheckWeightlessCacheAccuracy,
::testing::Combine(::testing::Bool(),
::testing::Bool(),
::testing::ValuesIn(inference_modes),
::testing::ValuesIn(model_dtypes)),
CheckWeightlessCacheAccuracy::get_test_case_name);
Expand Down

0 comments on commit e3606a5

Please sign in to comment.