extend executable create tensor APIs (#4163)

* style apply * update CPUTensorView with memory_pointers * add wait_to_read and wait_to_write * remove nullptr default and add second set of APIs * fix int_executable APIs Co-authored-by: Scott Cyphers <[email protected]>
NervanaSystems · Jan 29, 2020 · b8419c3 · b8419c3
1 parent 2651f73
commit b8419c3
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 2 deletions.
diff --git a/src/ngraph/runtime/cpu/cpu_backend.cpp b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -231,23 +231,52 @@ shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_input_tensor(si
                                                     parameter->get_shape());
 }
 
+shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_input_tensor(size_t input_index,
+                                                                              void* memory_pointer)
+{
+    shared_ptr<op::Parameter> parameter = get_parameter(input_index);
+    return make_shared<runtime::cpu::CPUTensorView>(
+        parameter->get_element_type(), parameter->get_shape(), memory_pointer);
+}
+
 shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_output_tensor(size_t output_index)
 {
     shared_ptr<op::Result> result = get_result(output_index);
     return make_shared<runtime::cpu::CPUTensorView>(result->get_element_type(),
                                                     result->get_shape());
 }
 
+shared_ptr<runtime::Tensor> runtime::cpu::CPU_Executable::create_output_tensor(size_t output_index,
+                                                                               void* memory_pointer)
+{
+    shared_ptr<op::Result> result = get_result(output_index);
+    return make_shared<runtime::cpu::CPUTensorView>(
+        result->get_element_type(), result->get_shape(), memory_pointer);
+}
+
 vector<shared_ptr<runtime::Tensor>>
     runtime::cpu::CPU_Executable::create_input_tensor(size_t input_index, size_t pipeline_depth)
 {
+    return create_input_tensor(input_index, pipeline_depth, std::vector<void*>{});
+}
+vector<shared_ptr<runtime::Tensor>> runtime::cpu::CPU_Executable::create_input_tensor(
+    size_t input_index, size_t pipeline_depth, std::vector<void*> memory_pointers)
+{
+    bool mem_ptr_size = memory_pointers.size();
+    if (mem_ptr_size > 0)
+    {
+        NGRAPH_CHECK(pipeline_depth == mem_ptr_size,
+                     "create_input_tensor mismatch in pipeline_depth and memory_pointers");
+    }
     vector<shared_ptr<runtime::cpu::CPUTensorView>> tensors;
     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
     for (size_t i = 0; i < pipeline_depth; i++)
     {
         shared_ptr<runtime::cpu::CPUTensorView> tensor;
         auto t = make_shared<runtime::cpu::CPUTensorView>(parameter->get_element_type(),
-                                                          parameter->get_shape());
+                                                          parameter->get_shape(),
+                                                          mem_ptr_size > 0 ? memory_pointers[i]
+                                                                           : nullptr);
         tensor = static_pointer_cast<runtime::cpu::CPUTensorView>(t);
         tensors.push_back(tensor);
     }
@@ -262,13 +291,26 @@ vector<shared_ptr<runtime::Tensor>>
 vector<shared_ptr<runtime::Tensor>>
     runtime::cpu::CPU_Executable::create_output_tensor(size_t output_index, size_t pipeline_depth)
 {
+    return create_output_tensor(output_index, pipeline_depth, std::vector<void*>{});
+}
+vector<shared_ptr<runtime::Tensor>> runtime::cpu::CPU_Executable::create_output_tensor(
+    size_t output_index, size_t pipeline_depth, std::vector<void*> memory_pointers)
+{
+    bool mem_ptr_size = memory_pointers.size();
+    if (mem_ptr_size > 0)
+    {
+        NGRAPH_CHECK(pipeline_depth == mem_ptr_size,
+                     "create_output_tensor mismatch in pipeline_depth and memory_pointers");
+    }
     vector<shared_ptr<runtime::cpu::CPUTensorView>> tensors;
     shared_ptr<op::Result> result = get_result(output_index);
     for (size_t i = 0; i < pipeline_depth; i++)
     {
         shared_ptr<runtime::cpu::CPUTensorView> tensor;
         auto t = make_shared<runtime::cpu::CPUTensorView>(result->get_element_type(),
-                                                          result->get_shape());
+                                                          result->get_shape(),
+                                                          mem_ptr_size > 0 ? memory_pointers[i]
+                                                                           : nullptr);
         tensor = static_pointer_cast<runtime::cpu::CPUTensorView>(t);
         tensors.push_back(tensor);
     }

diff --git a/src/ngraph/runtime/cpu/cpu_backend.hpp b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -96,14 +96,30 @@ namespace ngraph
 
                 std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index) override;
 
+                std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index,
+                                                                     void* memory_pointer) override;
+
                 std::shared_ptr<runtime::Tensor> create_output_tensor(size_t output_index) override;
 
+                std::shared_ptr<runtime::Tensor>
+                    create_output_tensor(size_t output_index, void* memory_pointer) override;
+
+                std::vector<std::shared_ptr<runtime::Tensor>>
+                    create_input_tensor(size_t input_index,
+                                        size_t pipeline_depth,
+                                        std::vector<void*> memory_pointers) override;
+
                 std::vector<std::shared_ptr<runtime::Tensor>>
                     create_input_tensor(size_t input_index, size_t pipeline_depth) override;
 
                 std::vector<std::shared_ptr<runtime::Tensor>>
                     create_output_tensor(size_t output_index, size_t pipeline_depth) override;
 
+                std::vector<std::shared_ptr<runtime::Tensor>>
+                    create_output_tensor(size_t output_index,
+                                         size_t pipeline_depth,
+                                         std::vector<void*> memory_pointers) override;
+
             private:
                 std::shared_ptr<ngraph::op::Parameter> get_parameter(size_t index) const;
                 std::shared_ptr<ngraph::op::Result> get_result(size_t index) const;

diff --git a/src/ngraph/runtime/executable.cpp b/src/ngraph/runtime/executable.cpp
@@ -145,20 +145,44 @@ shared_ptr<runtime::Tensor> runtime::Executable::create_input_tensor(size_t /* i
     throw runtime_error("create_input_tensor unimplemented");
 }
 
+shared_ptr<runtime::Tensor> runtime::Executable::create_input_tensor(size_t /* input_index */,
+                                                                     void* /* memory_pointer */)
+{
+    throw runtime_error("create_input_tensor unimplemented");
+}
+
 shared_ptr<runtime::Tensor> runtime::Executable::create_output_tensor(size_t /* output_index */)
 {
     throw runtime_error("create_output_tensor unimplemented");
 }
 
+shared_ptr<runtime::Tensor> runtime::Executable::create_output_tensor(size_t /* output_index */,
+                                                                      void* /* memory_pointer */)
+{
+    throw runtime_error("create_output_tensor unimplemented");
+}
+
 vector<shared_ptr<runtime::Tensor>>
     runtime::Executable::create_input_tensor(size_t /* input_index */, size_t /* pipeline_depth */)
 {
     throw runtime_error("create_input_tensor unimplemented");
 }
 
+vector<shared_ptr<runtime::Tensor>> runtime::Executable::create_input_tensor(
+    size_t /* input_index */, size_t /* pipeline_depth */, std::vector<void*> /* memory_pointer */)
+{
+    throw runtime_error("create_input_tensor unimplemented");
+}
+
 vector<shared_ptr<runtime::Tensor>>
     runtime::Executable::create_output_tensor(size_t /* output_index */,
                                               size_t /* pipeline_depth */)
 {
     throw runtime_error("create_output_tensor unimplemented");
 }
+
+vector<shared_ptr<runtime::Tensor>> runtime::Executable::create_output_tensor(
+    size_t /* output_index */, size_t /* pipeline_depth */, std::vector<void*> /* memory_pointer */)
+{
+    throw runtime_error("create_output_tensor unimplemented");
+}
diff --git a/src/ngraph/runtime/executable.hpp b/src/ngraph/runtime/executable.hpp
@@ -91,12 +91,32 @@ class NGRAPH_API ngraph::runtime::Executable
     /// \returns A Tensor
     virtual std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index);
 
+    /// \brief Create an input Tensor
+    /// \param input_index The index position in the input Parameter vector. This would be the same
+    /// order of Parameters passed into the inputs in the call() method.
+    /// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer
+    ///     must be sufficient to contain the tensor. The lifetime of the buffer is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A Tensor
+    virtual std::shared_ptr<runtime::Tensor> create_input_tensor(size_t input_index,
+                                                                 void* memory_pointer);
+
     /// \brief Create an output Tensor
     /// \param output_index The index position in the output Result vector. This would be the same
     /// order of Results passed into the outputs in the call() method.
     /// \returns A Tensor
     virtual std::shared_ptr<runtime::Tensor> create_output_tensor(size_t output_index);
 
+    /// \brief Create an output Tensor
+    /// \param output_index The index position in the output Result vector. This would be the same
+    /// order of Results passed into the outputs in the call() method.
+    /// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer
+    ///     must be sufficient to contain the tensor. The lifetime of the buffer is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A Tensor
+    virtual std::shared_ptr<runtime::Tensor> create_output_tensor(size_t output_index,
+                                                                  void* memory_pointer);
+
     /// \brief Create a vector of input Tensors
     /// \param input_index The index position in the input Parameter vector. This would be the same
     /// order of Parameters passed into the inputs in the call() method.
@@ -106,6 +126,18 @@ class NGRAPH_API ngraph::runtime::Executable
     virtual std::vector<std::shared_ptr<runtime::Tensor>>
         create_input_tensor(size_t input_index, size_t pipeline_depth);
 
+    /// \brief Create a vector of input Tensors
+    /// \param input_index The index position in the input Parameter vector. This would be the same
+    /// order of Parameters passed into the inputs in the call() method.
+    /// \param pipeline_depth The number of stages in the input pipeline. For double-buffered input
+    /// you would specify pipeline_depth=2
+    /// \param memory_pointers A vector of pointers to buffers used for this tensors. The size of
+    ///     the buffer must be sufficient to contain the tensor. The lifetime of the buffers is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A vector of Tensors, one for each stage of the pipeline
+    virtual std::vector<std::shared_ptr<runtime::Tensor>> create_input_tensor(
+        size_t input_index, size_t pipeline_depth, std::vector<void*> memory_pointers);
+
     /// \brief Create a vector of output Tensors
     /// \param output_index The index position in the output Result vector. This would be the same
     ///                     order of Results passed into the outputs in the call() method.
@@ -115,6 +147,18 @@ class NGRAPH_API ngraph::runtime::Executable
     virtual std::vector<std::shared_ptr<runtime::Tensor>>
         create_output_tensor(size_t output_index, size_t pipeline_depth);
 
+    /// \brief Create a vector of output Tensors
+    /// \param output_index The index position in the output Result vector. This would be the same
+    ///                     order of Results passed into the outputs in the call() method.
+    /// \param pipeline_depth The number of stages in the output pipeline. For double-buffered
+    ///                       output you would specify pipeline_depth=2
+    /// \param memory_pointers A vector of pointers to buffers used for this tensors. The size of
+    ///     the buffer must be sufficient to contain the tensor. The lifetime of the buffers is the
+    ///     responsibility of the caller and must outlive the created Tensor.
+    /// \returns A vector of Tensors, one for each stage of the pipeline
+    virtual std::vector<std::shared_ptr<runtime::Tensor>> create_output_tensor(
+        size_t output_index, size_t pipeline_depth, std::vector<void*> memory_pointers);
+
 protected:
     /// \brief Called at the end of compile to the values to be returned by get_parameters
     ///        and get_results

diff --git a/src/ngraph/runtime/tensor.hpp b/src/ngraph/runtime/tensor.hpp
@@ -103,6 +103,12 @@ namespace ngraph
             /// \param n Number of bytes to read, must be integral number of elements.
             virtual void read(void* p, size_t n) const = 0;
 
+            /// \brief check tensor for new data, call may block.
+            ///    backends may use this to ensure tensor is updated (eg: lazy eval).
+            virtual void wait_for_read_ready() {}
+            /// \brief notify tensor of new data, call may block.
+            ///    backends may use this as indication of new data in tensor.
+            virtual void wait_for_write_ready() {}
             /// \brief copy bytes directly from source to this tensor
             /// \param source The source tensor
             virtual void copy_from(const ngraph::runtime::Tensor& source) NGRAPH_DEPRECATED(