[GPU] Enable encryption of cache blob with CacheMode::OPTIMIZE_SIZE (#…

…27912) ### Details: - Enables encryption of cache blob with CacheMode::OPTIMIZE_SIZE in GPU Plugin. - Some additional test coverage already present in src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp. Test coverage in this PR is distinct from those tests because it also checks correctness of the results. - #27742 has to be merged first - it guarantees small cache sizes in majority of cases which is important for encryption. ### Tickets: - CVS-158140 --------- Co-authored-by: Sergey Shlyapnikov <[email protected]>
openvinotoolkit · Dec 13, 2024 · e3606a5 · e3606a5
1 parent bd9a6d1
commit e3606a5
Show file tree

Hide file tree

Showing 5 changed files with 115 additions and 14 deletions.
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
@@ -801,6 +801,8 @@ struct EncryptionCallbacks {
  * when loading from the cache. This property is set in core.compile_model only.
  * - First value of the struct is encryption function.
  * - Second value of the struct is decryption function.
+ * @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to
+ * "OPTIMIZE_SIZE".
  * @ingroup ov_runtime_cpp_prop_api
  */
 static constexpr Property<EncryptionCallbacks, PropertyMutability::WO> cache_encryption_callbacks{

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
@@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
     BinaryOutputBuffer(std::ostream& stream)
     : OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}
 
-    void write(void const * data, std::streamsize size) {
+    virtual ~BinaryOutputBuffer() = default;
+
+    virtual void write(void const* data, std::streamsize size) {
         auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
         OPENVINO_ASSERT(written_size == size,
-            "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
+                        "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " +
+                            std::to_string(written_size));
     }
 
+    virtual void flush() {}
+
     void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
     void* getKernelImplParams() const { return _impl_params; }
     void set_stream(void* strm) { _strm = strm; }
@@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
     BinaryInputBuffer(std::istream& stream, engine& engine)
     : InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}
 
-    void read(void* const data, std::streamsize size) {
+    virtual ~BinaryInputBuffer() = default;
+
+    virtual void read(void* const data, std::streamsize size) {
         auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
         OPENVINO_ASSERT(read_size == size,
             "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
@@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
     void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
     void* getKernelImplParams() const { return _impl_params; }
 
-    std::streampos tellg() { return _stream.tellg(); }
-    void seekg(std::streampos pos) { _stream.seekg(pos); }
-
 private:
     std::istream& _stream;
     void* _impl_params;
 };
 
+class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer {
+public:
+    EncryptedBinaryOutputBuffer(std::ostream& stream, std::function<std::string(const std::string&)> encrypt)
+        : BinaryOutputBuffer(stream),
+          encrypt(encrypt) {
+        OPENVINO_ASSERT(encrypt);
+    }
+
+    ~EncryptedBinaryOutputBuffer() override = default;
+
+    void write(void const* data, std::streamsize size) override {
+        plaintext_str.append(reinterpret_cast<const char*>(data), size);
+    }
+
+    void flush() override {
+        auto encrypted_str = encrypt(plaintext_str);
+        size_t bytes = encrypted_str.size();
+        BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
+        BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size());
+    }
+
+private:
+    std::string
+        plaintext_str;  // Not using stringstream here because passing to encrypt() would produce an additional copy.
+    std::function<std::string(const std::string&)> encrypt;
+};
+
+class EncryptedBinaryInputBuffer : public BinaryInputBuffer {
+public:
+    EncryptedBinaryInputBuffer(std::istream& stream,
+                               engine& engine,
+                               std::function<std::string(const std::string&)> decrypt)
+        : BinaryInputBuffer(stream, engine),
+          decrypt(decrypt) {
+        OPENVINO_ASSERT(decrypt);
+
+        size_t bytes;
+        BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
+
+        // Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional
+        // copy.
+        std::string str(bytes, 0);
+        BinaryInputBuffer::read(
+            make_data(const_cast<void*>(reinterpret_cast<const void*>(str.c_str())), str.size()).data,
+            str.size());
+        plaintext_stream.str(decrypt(str));
+    }
+
+    ~EncryptedBinaryInputBuffer() override = default;
+
+    void read(void* const data, std::streamsize size) override {
+        auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
+        OPENVINO_ASSERT(
+            read_size == size,
+            "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
+    }
+
+private:
+    std::stringstream plaintext_stream;
+    std::function<std::string(const std::string&)> decrypt;
+};
+
 template <typename T>
 class Serializer<BinaryOutputBuffer, T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
 public:

diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
     OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");
 
-    cldnn::BinaryOutputBuffer ob(model);
+    const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks);
+
+    // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
+    const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
+    std::unique_ptr<cldnn::BinaryOutputBuffer> ob_ptr =
+        encryption_enabled
+            ? cldnn::make_unique<cldnn::EncryptedBinaryOutputBuffer>(model, encryption_callbacks.encrypt)
+            : cldnn::make_unique<cldnn::BinaryOutputBuffer>(model);
+    auto& ob = *ob_ptr;
+
     ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
 
     // Inputs
@@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const {
     }
 
     get_graph(0)->export_model(ob);
+    ob.flush();
 }
 
 std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -339,12 +339,21 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
     config.set_user_property(_orig_config);
     config.apply_user_properties(context_impl->get_engine().get_device_info());
 
-    cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());
+    ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
+    ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
+    const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
 
-    ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
-    ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
+    std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
+        encryption_enabled ? cldnn::make_unique<cldnn::EncryptedBinaryInputBuffer>(model,
+                                                                                   context_impl->get_engine(),
+                                                                                   encryption_callbacks.decrypt)
+                           : cldnn::make_unique<cldnn::BinaryInputBuffer>(model, context_impl->get_engine());
+    auto& ib = *ib_ptr;
 
-    if (cache_mode != config.get_property(ov::cache_mode)) {
+    ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
+    ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode));
+
+    if (loaded_cache_mode != cache_mode) {
         return nullptr;
     }
 
@@ -608,6 +617,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
         ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
         ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::kv_cache_precision.name(), PropertyMutability::RW},
     };
 

diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
@@ -13,21 +13,24 @@
 #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
 #include "common_test_utils/test_common.hpp"
 #include "openvino/pass/serialize.hpp"
+#include "openvino/util/codec_xor.hpp"
 
 namespace {
-typedef std::tuple<bool, ov::element::Type, ov::element::Type> testParams;
+typedef std::tuple<bool, bool, ov::element::Type, ov::element::Type> testParams;
 
 class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface<testParams> {
 public:
     static std::string get_test_case_name(::testing::TestParamInfo<testParams> obj) {
         bool use_compile_model_api_;
+        bool do_encryption_;
         ov::element::Type inference_mode_;
         ov::element::Type model_dtype_;
-        std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param;
+        std::tie(use_compile_model_api_, do_encryption_, inference_mode_, model_dtype_) = obj.param;
 
         std::ostringstream result;
         const char separator = '_';
         result << "use_compile_model_api=" << use_compile_model_api_ << separator;
+        result << "_do_encryption=" << do_encryption_;
         result << "inference_mode=" << inference_mode_ << separator;
         result << "model_dtype=" << model_dtype_;
         return result.str();
@@ -40,6 +43,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::W
     std::string cache_path;
     std::string cache_dir;
     bool use_compile_model_api;  // for loading from cache
+    bool do_encryption;
     ov::element::Type inference_mode;
     ov::element::Type model_dtype;
 
@@ -55,7 +59,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
     cache_path = filePrefix + ".blob";
     cache_dir = filePrefix + "_cache_dir";
 
-    std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam();
+    std::tie(use_compile_model_api, do_encryption, inference_mode, model_dtype) = GetParam();
 }
 
 void CheckWeightlessCacheAccuracy::TearDown() {
@@ -75,6 +79,14 @@ void CheckWeightlessCacheAccuracy::run() {
     ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
                                            ov::weights_path(bin_path),
                                            ov::hint::inference_precision(inference_mode)};
+
+    if (do_encryption) {
+        ov::EncryptionCallbacks encryption_callbacks;
+        encryption_callbacks.encrypt = ov::util::codec_xor;
+        encryption_callbacks.decrypt = ov::util::codec_xor;
+        config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+        config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+    }
     auto core = ov::test::utils::PluginCache::get().core();
     ov::pass::Serialize(xml_path, bin_path).run_on_model(model);
 
@@ -150,6 +162,7 @@ const std::vector<ov::element::Type> model_dtypes = {
 INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
                          CheckWeightlessCacheAccuracy,
                          ::testing::Combine(::testing::Bool(),
+                                            ::testing::Bool(),
                                             ::testing::ValuesIn(inference_modes),
                                             ::testing::ValuesIn(model_dtypes)),
                          CheckWeightlessCacheAccuracy::get_test_case_name);