Runtime-configurable symbolic kernel

See #205
ddemidov · May 7, 2016 · e2fad48 · e2fad48
1 parent cb6f9eb
commit e2fad48
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 110 deletions.
diff --git a/vexcl/backend/compute/kernel.hpp b/vexcl/backend/compute/kernel.hpp
@@ -206,6 +206,11 @@ class kernel {
         /// Get reference to the underlying object.
         const boost::compute::kernel& get() const { return K; }
         boost::compute::kernel& get() { return K; }
+
+        /// Reset argument counter.
+        void reset() {
+            argpos = 0;
+        }
     private:
         unsigned argpos;
 

diff --git a/vexcl/backend/cuda/kernel.hpp b/vexcl/backend/cuda/kernel.hpp
@@ -137,8 +137,7 @@ class kernel {
                         )
                     );
 
-            stack.clear();
-            prm_pos.clear();
+            reset();
         }
 
 #ifndef BOOST_NO_VARIADIC_TEMPLATES
@@ -206,6 +205,12 @@ class kernel {
 
         /// Get the underlying CUfunction
         CUfunction get() const { return K; }
+
+        /// Reset argument counter.
+        void reset() {
+            stack.clear();
+            prm_pos.clear();
+        }
     private:
         context ctx;
         program P;

diff --git a/vexcl/backend/opencl/kernel.hpp b/vexcl/backend/opencl/kernel.hpp
@@ -231,6 +231,11 @@ class kernel {
         /// Get reference to the underlying object.
         const cl::Kernel& get() const { return K; }
         cl::Kernel& get() { return K; }
+
+        /// Reset argument counter.
+        void reset() {
+            argpos = 0;
+        }
     private:
         unsigned argpos;
 

diff --git a/vexcl/generator.hpp b/vexcl/generator.hpp
@@ -502,38 +502,34 @@ std::ostream& operator<<(std::ostream &os, const symbolic<T> &sym) {
 namespace generator {
 
 /// Autogenerated kernel.
-template <size_t NP>
-class Kernel {
+class kernel {
     public:
-        template <class ArgTuple>
-        Kernel(
+        kernel(
                 const std::vector<backend::command_queue> &queue,
-                const std::string &name, const std::string &body,
-                const ArgTuple& args
-              ) : queue(queue)
-        {
-            static_assert(
-                    boost::fusion::result_of::size<ArgTuple>::value == NP,
-                    "Wrong number of kernel parameters"
-                    );
+                const std::string &name
+              ) : queue(queue), name(name), psize(queue.size(), 0)
+        {}
+
+        template <class SymVar>
+        void add_param(const SymVar &var) {
+            prm_decl << "\t" << var.prmdecl() << ",\n";
+            prm_read << var.init();
+            prm_save << var.write();
+        }
 
+        void build(const std::string &body) {
             for(auto q = queue.begin(); q != queue.end(); q++) {
                 backend::source_generator source(*q);
 
                 source << get_preamble().str();
 
-                source.kernel(name).open("(")
-                    .parameter<size_t>("n");
+                source.kernel(name).open("(");
 
-                boost::fusion::for_each(args, declare_params(source));
+                source << prm_decl.str() << "\t" << type_name<size_t>() << " n";
 
                 source.close(")").open("{").grid_stride_loop().open("{");
 
-                boost::fusion::for_each(args, read_params(source));
-
-                source << body;
-
-                boost::fusion::for_each(args, write_params(source));
+                source.new_line() << prm_read.str() << body << prm_save.str();
 
                 source.close("}").close("}");
 
@@ -545,117 +541,85 @@ class Kernel {
             }
         }
 
+        template <class T>
+        void push_arg(const T &v) {
+            for(unsigned d = 0; d < queue.size(); d++) {
+                cache.find(backend::get_context_id(queue[d]))->second.push_arg(v);
+            }
+        }
+
+        template <class T>
+        void push_arg(const vector<T> &v) {
+            for(unsigned d = 0; d < queue.size(); d++) {
+                cache.find(backend::get_context_id(queue[d]))->second.push_arg(v(d));
+                psize[d] = std::max(psize[d], v.part_size(d));
+            }
+        }
+
+        void operator()() {
+            for(unsigned d = 0; d < queue.size(); d++) {
+                auto &K = cache.find(backend::get_context_id(queue[d]))->second;
+
+                if (psize[d]) {
+                    K.push_arg(psize[d]);
+                    K(queue[d]);
+
+                    psize[d] = 0;
+                } else {
+                    K.reset();
+                }
+            }
+        }
+
 #ifndef BOOST_NO_VARIADIC_TEMPLATES
         /// Launches the kernel with the provided parameters.
         template <class... Param>
         void operator()(const Param&... param) {
-            launch(boost::fusion::vector_tie(param...));
+            boost::fusion::for_each(boost::fusion::vector_tie(param...), push_args(*this));
+            (*this)();
         }
 #else
 
 #define VEXCL_FUNCALL_OPERATOR(z, n, data)                                     \
   template <BOOST_PP_ENUM_PARAMS(n, class Param)>                              \
   void operator()(BOOST_PP_ENUM_BINARY_PARAMS(n, const Param, &param)) {       \
-    launch(boost::fusion::vector_tie(BOOST_PP_ENUM_PARAMS(n, param)));         \
+    boost::fusion::for_each(                                                   \
+            boost::fusion::vector_tie(BOOST_PP_ENUM_PARAMS(n, param)),         \
+            push_args(*this)                                                   \
+            );                                                                 \
+    (*this)();                                                                 \
   }
 
 BOOST_PP_REPEAT_FROM_TO(1, VEXCL_MAX_ARITY, VEXCL_FUNCALL_OPERATOR, ~)
 
 #undef VEXCL_FUNCALL_OPERATOR
 
 #endif
-    private:
-
-        template <class ParamTuple>
-        void launch(const ParamTuple &param) {
-            static_assert(
-                    boost::fusion::result_of::size<ParamTuple>::value == NP,
-                    "Wrong number of kernel parameters"
-                    );
-
-            for(unsigned d = 0; d < queue.size(); d++) {
-                if (size_t psize = boost::fusion::fold(param, 0, param_size(d))) {
-                    auto key = backend::get_context_id(queue[d]);
-                    auto krn = cache.find(key);
-                    krn->second.push_arg(psize);
-
-                    set_params setprm(krn->second, d);
-                    boost::fusion::for_each(param, setprm);
-
-                    krn->second(queue[d]);
-                }
-            }
-        }
-
-        struct declare_params {
-            backend::source_generator &src;
 
-            declare_params(backend::source_generator &src) : src(src) {}
+        struct add_params {
+            kernel &K;
 
-            template <class T>
-            void operator()(const T &v) const {
-                src << ",\n\t" << v.prmdecl();
-            }
-        };
-
-        struct read_params {
-            backend::source_generator &src;
-
-            read_params(backend::source_generator &src) : src(src) {}
-
-            template <class T>
-            void operator()(const T &v) const {
-                src << v.init();
-            }
-        };
-
-        struct write_params {
-            backend::source_generator &src;
-
-            write_params(backend::source_generator &src) : src(src) {}
+            add_params(kernel &K) : K(K) {}
 
             template <class T>
             void operator()(const T &v) const {
-                src << v.write();
+                K.add_param(v);
             }
         };
-
-        struct set_params {
-            backend::kernel &krn;
-            unsigned d;
-
-            set_params(backend::kernel &krn, unsigned d)
-                : krn(krn), d(d) {};
-
-            template <class T>
-            void operator()(const T &v) const {
-                krn.push_arg(v);
-            }
-            template <class T>
-            void operator()(const vector<T> &v) const {
-                krn.push_arg(v(d));
-            }
-        };
-
+    private:
         std::vector<backend::command_queue> queue;
-
+        std::string name;
+        std::vector<size_t> psize;
+        std::ostringstream prm_decl, prm_read, prm_save;
         std::map<vex::backend::context_id, vex::backend::kernel> cache;
 
-        struct param_size {
-            unsigned device;
-
-            param_size(unsigned device) : device(device) {}
-
-            typedef size_t result_type;
-
-            template <class T>
-            size_t operator()(size_t s, const T&) const {
-                return s;
-            }
+        struct push_args {
+            kernel &K;
+            push_args(kernel &K) : K(K) {}
 
             template <class T>
-            size_t operator()(size_t s, const vector<T> &v) const {
-                return std::max(s, v.part_size(device));
+            void operator()(const T &p) const {
+                K.push_arg(p);
             }
         };
 };
@@ -700,12 +664,18 @@ class Function {
  * arguments.
  */
 template <class... Args>
-auto build_kernel(
+kernel build_kernel(
         const std::vector<backend::command_queue> &queue,
         const std::string &name, const std::string& body, const Args&... args
-        ) -> Kernel<sizeof...(Args)>
+        )
 {
-    return Kernel<sizeof...(Args)>(queue, name, body, boost::fusion::vector_tie(args...));
+    kernel K(queue, name);
+    boost::fusion::for_each(
+            boost::fusion::vector_tie(args...),
+            kernel::add_params(K)
+            );
+    K.build(body);
+    return K;
 }
 
 /// Builds function body from the recorded expression.
@@ -721,11 +691,15 @@ std::string make_function(std::string body, const Ret &ret, const Args&... args)
 
 #define VEXCL_BUILD_KERNEL(z, n, data)                                         \
   template <BOOST_PP_ENUM_PARAMS(n, class Arg)>                                \
-  Kernel<n> build_kernel(const std::vector<backend::command_queue> & queue,    \
+  kernel build_kernel(const std::vector<backend::command_queue> & queue,    \
                          const std::string & name, const std::string & body,   \
                          BOOST_PP_ENUM_BINARY_PARAMS(n, const Arg, &arg)) {    \
-    return Kernel<n>(queue, name, body,                                        \
-                     boost::fusion::vector_tie(BOOST_PP_ENUM_PARAMS(n, arg))); \
+    kernel K(queue, name);                                                     \
+    boost::fusion::for_each(                                                   \
+            boost::fusion::vector_tie(BOOST_PP_ENUM_PARAMS(n, arg)),           \
+            detail::kernel_add_param(K));                                      \
+    K.build(body);                                                             \
+    return K;                                                                  \
   }
 
 #define VEXCL_MAKE_FUNCTION(z, n, data)                                        \