From 1e3ba876bf73e844afd22a2fc65a9f77adff2193 Mon Sep 17 00:00:00 2001 From: Ysobel Sims <35280100+ysims@users.noreply.github.com> Date: Thu, 7 Jul 2022 14:54:37 +1000 Subject: [PATCH] Add ability to cache built binaries in OpenCL engine (#11) --- cpp/visualmesh/engine/opencl/engine.hpp | 157 ++++++++++++++++++++---- 1 file changed, 132 insertions(+), 25 deletions(-) diff --git a/cpp/visualmesh/engine/opencl/engine.hpp b/cpp/visualmesh/engine/opencl/engine.hpp index 1688791..9c7989f 100644 --- a/cpp/visualmesh/engine/opencl/engine.hpp +++ b/cpp/visualmesh/engine/opencl/engine.hpp @@ -21,6 +21,7 @@ // If OpenCL is disabled then don't provide this file #if !defined(VISUALMESH_DISABLE_OPENCL) +#include #include #include #include @@ -63,56 +64,164 @@ namespace engine { // OpenCL ::clSetKernelArg functions take the sizeof a pointer as their argument, this is correct static constexpr size_t MEM_SIZE = sizeof(cl_mem); - public: /** - * @brief Construct a new OpenCL Engine object + * @brief Load an OpenCL binary from a file and build it + * + * @param binary_path path to save the binary file to + * @param device OpenCL device id * - * @param structure the network structure to use classification */ - Engine(const NetworkStructure& structure = {}) { + void load_binary(const std::string& binary_path, cl_device_id& device) { + // If the file doesn't exist, this isn't an error so don't throw just return that it didn't work + std::ifstream read_binary(binary_path, std::ios::in); + if (!read_binary) { throw std::runtime_error("Failed to read from precompiled OpenCL binary."); } + + // Error flag to check if any OpenCL functions fail + cl_int error = CL_SUCCESS; + + // Get the length + read_binary.seekg(0, read_binary.end); + size_t binary_size = read_binary.tellg(); + read_binary.seekg(0, read_binary.beg); + + // Read the binary file + std::vector binary_load(binary_size, 0); + read_binary.read(binary_load.data(), binary_size); + read_binary.close(); + if (!read_binary) { throw std::runtime_error("Failed to read from precompiled OpenCL binary."); } + + // Create the program and build using the loaded binary + cl_int binary_status = CL_SUCCESS; + const unsigned char* binary_ptr = reinterpret_cast(binary_load.data()); + + program = cl::program( + ::clCreateProgramWithBinary(context, 1, &device, &binary_size, &binary_ptr, &binary_status, &error), + ::clReleaseProgram); + throw_cl_error(error, "Failed to create program from binary"); - // Create the OpenCL context and command queue - cl_int error = CL_SUCCESS; - cl_device_id device = nullptr; - std::tie(context, device) = operation::make_context(); - queue = operation::make_queue(context, device); + error = ::clBuildProgram(program, + 1, + &device, + "-cl-single-precision-constant -cl-fast-relaxed-math -cl-mad-enable", + nullptr, + nullptr); - // Get program sources (this does concatenated strings) - std::stringstream sources; - sources << operation::get_scalar_defines(Scalar(0.0)); - sources << PROJECT_EQUIDISTANT_CL; - sources << PROJECT_EQUISOLID_CL; - sources << PROJECT_RECTILINEAR_CL; - sources << LOAD_IMAGE_CL; - sources << operation::make_network(structure); + // If it didn't work, log and throw an error + if (error != CL_SUCCESS) { + // Get program build log + size_t used = 0; + ::clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &used); + std::vector log(used); + ::clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log.size(), log.data(), &used); + // Throw an error with the build log + throw_cl_error(error, + "Error building OpenCL program\n" + std::string(log.begin(), log.begin() + used)); + } + } - std::string source = sources.str(); - const char* cstr = source.c_str(); - size_t csize = source.size(); + /** + * @brief Build the OpenCL program + * + * @param device OpenCL device id + * @param source OpenCL source information + */ + void build_from_source(cl_device_id& device, const std::string& source) { + // Error flag to check if any OpenCL functions fail + cl_int error = CL_SUCCESS; + // Create the program and build + const char* cstr = source.c_str(); + size_t csize = source.size(); program = cl::program(::clCreateProgramWithSource(context, 1, &cstr, &csize, &error), ::clReleaseProgram); throw_cl_error(error, "Error adding sources to OpenCL program"); - // Compile the program error = ::clBuildProgram(program, 0, nullptr, "-cl-single-precision-constant -cl-fast-relaxed-math -cl-mad-enable", nullptr, nullptr); + + // If it didn't work, log and throw an error if (error != CL_SUCCESS) { // Get program build log size_t used = 0; ::clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &used); std::vector log(used); ::clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log.size(), log.data(), &used); - // Throw an error with the build log throw_cl_error(error, "Error building OpenCL program\n" + std::string(log.begin(), log.begin() + used)); } + } + + /** + * @brief Save the current OpenCL program in a binary file + * + * @param binary_path path to save the binary file to + */ + void save_binary(std::string binary_path) { + + // Get the size of the binary to save + size_t binary_size{}; + clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, nullptr); + + // Get the data to save + std::vector binary_save(binary_size, 0); + // Get an lvalue ptr to pass to clGetProgramInfo + char* binary_ptr = binary_save.data(); + clGetProgramInfo(program, CL_PROGRAM_BINARIES, binary_save.size(), &binary_ptr, nullptr); + + // Write to the file and close the file + std::ofstream write_binary(binary_path, std::ofstream::binary); + write_binary.write(binary_save.data(), binary_save.size()); + write_binary.close(); + } + + public: + /** + * @brief Construct a new OpenCL Engine object + * + * @param structure the network structure to use classification + * @param cache_directory directory to save/load the compiled OpenCL binary + */ + Engine(const NetworkStructure& structure = {}, const std::string& cache_directory = "") { + // Create the OpenCL context and command queue + cl_int error = CL_SUCCESS; + cl_device_id device = nullptr; + std::tie(context, device) = operation::make_context(); + queue = operation::make_queue(context, device); + + // Get program sources (this does concatenated strings) + std::stringstream sources; + sources << operation::get_scalar_defines(Scalar(0.0)); + sources << PROJECT_EQUIDISTANT_CL; + sources << PROJECT_EQUISOLID_CL; + sources << PROJECT_RECTILINEAR_CL; + sources << LOAD_IMAGE_CL; + sources << operation::make_network(structure); + + std::string source = sources.str(); + + // The hash of the sources represents the name of the OpenCL compiled program binary file, so that a new + // binary will be created for different sources + const std::size_t source_hash = std::hash{}(source); + + // If the compiled binary exists, read it + std::string binary_path = cache_directory + "/" + std::to_string(source_hash) + ".bin"; + + // Try to read the binary + try { + load_binary(binary_path, device); + } + // The compiled binary doesn't exist, create it + catch (std::exception& /* e */) { + build_from_source(device, source); + save_binary(binary_path); + } + // Get the kernels project_rectilinear = cl::kernel(::clCreateKernel(program, "project_rectilinear", &error), ::clReleaseKernel); throw_cl_error(error, "Error getting project_rectilinear kernel"); @@ -513,9 +622,7 @@ namespace engine { // Cache for future runs device_points_cache[&mesh] = cl_points; } - else { - cl_points = device_mesh->second; - } + else { cl_points = device_mesh->second; } // First count the size of the buffer we will need to allocate int n_points = 0;