diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt index 96d12f435..b32fe92a5 100644 --- a/test_conformance/api/CMakeLists.txt +++ b/test_conformance/api/CMakeLists.txt @@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES test_queries.cpp test_create_kernels.cpp test_kernels.cpp + test_kernel_local_memory_size.cpp test_kernel_private_memory_size.cpp test_api_min_max.cpp test_kernel_arg_changes.cpp diff --git a/test_conformance/api/main.cpp b/test_conformance/api/main.cpp index 5b4e800c6..abdd026e4 100644 --- a/test_conformance/api/main.cpp +++ b/test_conformance/api/main.cpp @@ -163,7 +163,7 @@ test_definition test_list[] = { ADD_TEST_VERSION(negative_create_command_queue_with_properties, Version(2, 0)), ADD_TEST(negative_create_command_queue_with_properties_khr), - ADD_TEST(kernel_local_mem_size), + ADD_TEST(kernel_local_memory_size), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h index adba611bd..780b39de8 100644 --- a/test_conformance/api/procs.h +++ b/test_conformance/api/procs.h @@ -214,8 +214,10 @@ extern int test_consistency_requirements_fp16(cl_device_id deviceID, extern int test_min_image_formats(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_kernel_local_mem_size(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); +extern int test_kernel_local_memory_size(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); extern int test_negative_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, diff --git a/test_conformance/api/test_api_consistency.cpp b/test_conformance/api/test_api_consistency.cpp index b1dc20abf..c003b868c 100644 --- a/test_conformance/api/test_api_consistency.cpp +++ b/test_conformance/api/test_api_consistency.cpp @@ -18,6 +18,7 @@ #include "testBase.h" #include "harness/testHarness.h" #include "harness/deviceInfo.h" +#include static const char* test_kernel = R"CLC( __kernel void test(__global int* dst) { @@ -25,34 +26,6 @@ __kernel void test(__global int* dst) { } )CLC"; -const char* empty_kernel[] = { "__kernel void empty_kernel()\n" - "{\n" - "}\n" }; - -const char* local_memory_kernel[] = { - "__kernel void local_memory_kernel(__local int* ptr)\n" - "{\n" - "__local float array[10000];\n" - "for(int i = 0; i<10000; i++)\n" - " array[i]*=2;\n" - "}\n" -}; - -const char* local_param_kernel[] = { - "__kernel void local_param_kernel(__local int* ptr)\n" - "{\n" - "}\n" -}; - -const char* local_param_local_memory_kernel[] = { - "__kernel void local_param_local_memory_kernel(__local int* ptr)\n" - "{\n" - "__local float array[10000];\n" - "for(int i = 0; i<10000; i++)\n" - " array[i]*=2;\n" - "}\n" -}; - int test_consistency_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { @@ -1180,106 +1153,6 @@ int test_consistency_3d_image_writes(cl_device_id deviceID, cl_context context, return TEST_PASS; } -int test_kernel_local_mem_size(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - - // Check memory needed to execute empty kernel - if (create_single_kernel_helper(context, &program, &kernel, 1, empty_kernel, - "empty_kernel") - != 0) - { - return -1; - } - - cl_ulong kernelLocalUsage = 0; - error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(kernelLocalUsage), - &kernelLocalUsage, NULL); - test_error(error, - "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); - - test_assert_error(kernelLocalUsage > 0, "kernel local mem size failed"); - - // Check memory needed to execute empty kernel with __local variable - if (create_single_kernel_helper(context, &program, &kernel, 1, - local_memory_kernel, "local_memory_kernel") - != 0) - { - return -1; - } - - kernelLocalUsage = 0; - error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(kernelLocalUsage), - &kernelLocalUsage, NULL); - test_error(error, - "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); - - test_assert_error(kernelLocalUsage >= 10000 * sizeof(cl_float), - "kernel local mem size failed"); - - // Check memory needed to execute empty kernel with __local parameter with - // setKernelArg - if (create_single_kernel_helper(context, &program, &kernel, 1, - local_param_kernel, "local_param_kernel") - != 0) - { - return -1; - } - - size_t elements = 100; - size_t sizeToAllocate = elements * sizeof(cl_int); - int* localData = (cl_int*)malloc(sizeToAllocate); - for (size_t i = 0; i < elements; i++) - { - localData[i] = i; - } - error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL); - test_error(error, "Unable to set indexed kernel arguments"); - - kernelLocalUsage = 0; - error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(kernelLocalUsage), - &kernelLocalUsage, NULL); - test_error(error, - "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); - - test_assert_error(kernelLocalUsage >= sizeToAllocate, - "kernel local mem size failed"); - - // Check memory needed to execute kernel with __local variable and __local - // parameter with setKernelArg - if (create_single_kernel_helper(context, &program, &kernel, 1, - local_param_local_memory_kernel, - "local_param_local_memory_kernel") - != 0) - { - return -1; - } - - error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL); - test_error(error, "Unable to set indexed kernel arguments"); - - kernelLocalUsage = 0; - error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(kernelLocalUsage), - &kernelLocalUsage, NULL); - test_error(error, - "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); - - test_assert_error(kernelLocalUsage - >= sizeToAllocate + 10000 * sizeof(cl_float), - "kernel local mem size failed"); - - free(localData); - - return CL_SUCCESS; -} - int test_consistency_requirements_fp64(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) diff --git a/test_conformance/api/test_kernel_local_memory_size.cpp b/test_conformance/api/test_kernel_local_memory_size.cpp new file mode 100644 index 000000000..0e38a583c --- /dev/null +++ b/test_conformance/api/test_kernel_local_memory_size.cpp @@ -0,0 +1,259 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include + +#include "testBase.h" +#include "harness/testHarness.h" +#include + +static const char* empty_kernel = R"CLC( +__kernel void empty_kernel() { +} +)CLC"; + +static const char* local_memory_kernel = R"CLC( +__kernel void local_memory_kernel(global int* data) { + __local int array[10]; + + size_t id = get_global_id(0); + array[id] = 2 * id; + data[id] = array[id]; + + barrier(CLK_LOCAL_MEM_FENCE); + if(id == 0) + { + for(size_t i = 0; i < 10; i++) + data[id] += array[i]; + } +} +)CLC"; + +static const char* local_param_kernel = R"CLC( +__kernel void local_param_kernel(__local int* local_ptr, __global int* src, + __global int* dst) { + + size_t id = get_global_id(0); + + local_ptr[id] = src[id]; + barrier(CLK_GLOBAL_MEM_FENCE); + dst[id] = local_ptr[id]; + barrier(CLK_LOCAL_MEM_FENCE); + if(id == 9) + { + for(size_t i = 0; i < 10; i++) + dst[id] += local_ptr[i]; + } +} +)CLC"; + +static const char* local_param_local_memory_kernel = R"CLC( +__kernel void local_param_local_memory_kernel(__local int* local_ptr, + __global int* src, __global int* dst) { + + size_t id = get_global_id(0); + + __local int local_data[10]; + local_ptr[id] = src[id]; + + barrier(CLK_LOCAL_MEM_FENCE); + if(id / 2 == 0) { + for(size_t i = 0; i < 10; i++) + local_data[id] += local_ptr[i]; + } + else + local_data[id] = local_ptr[id] * 2; + + barrier(CLK_LOCAL_MEM_FENCE); + + dst[id] = local_data[id]; + barrier(CLK_LOCAL_MEM_FENCE); + if(id == 9) + { + for(size_t i = 0; i < 10; i++) + dst[id] += local_data[i]; + dst[id] += 666; + } +} +)CLC"; + +int test_kernel_local_memory_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int error; + clProgramWrapper program; + clKernelWrapper kernel; + + // Check memory needed to execute empty kernel + if (create_single_kernel_helper(context, &program, &kernel, 1, + &empty_kernel, "empty_kernel") + != 0) + { + return TEST_FAIL; + } + + cl_ulong kernel_local_usage = 0; + error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernel_local_usage), + &kernel_local_usage, NULL); + test_error(error, + "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); + + test_assert_error(kernel_local_usage >= 0, "kernel local mem size failed"); + + // Check memory needed to execute empty kernel with __local variable + if (create_single_kernel_helper(context, &program, &kernel, 1, + &local_memory_kernel, "local_memory_kernel") + != 0) + { + return TEST_FAIL; + } + + kernel_local_usage = 0; + error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernel_local_usage), + &kernel_local_usage, NULL); + test_error(error, + "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); + + const size_t size = 10; + const size_t memory = size * sizeof(cl_int); + + size_t global_work_size[] = { size }; + + std::unique_ptr data(new int[size]); + for (size_t i = 0; i < size; i++) + { + data[i] = 0; + } + clMemWrapper streams[2]; + + streams[0] = + clCreateBuffer(context, CL_MEM_READ_WRITE, memory, NULL, &error); + test_error(error, "Creating test array failed"); + + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_work_size, + nullptr, 0, NULL, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed."); + + error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, memory, + data.get(), 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); + + test_assert_error(kernel_local_usage >= memory, + "kernel local mem size failed"); + + // Check memory needed to execute empty kernel with __local parameter with + // setKernelArg + if (create_single_kernel_helper(context, &program, &kernel, 1, + &local_param_kernel, "local_param_kernel") + != 0) + { + return TEST_FAIL; + } + + kernel_local_usage = 0; + + for (size_t i = 0; i < size; i++) + { + data[i] = i; + } + + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, memory, + data.get(), &error); + test_error(error, "Creating test array failed"); + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, memory, nullptr, &error); + test_error(error, "Creating test array failed"); + + error = clSetKernelArg(kernel, 0, memory, NULL); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_work_size, + nullptr, 0, NULL, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed."); + + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, memory, + data.get(), 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); + + error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernel_local_usage), + &kernel_local_usage, NULL); + test_error(error, + "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); + + test_assert_error(kernel_local_usage >= memory, + "kernel local mem size failed"); + + + // Check memory needed to execute kernel with __local variable and __local + // parameter with setKernelArg + if (create_single_kernel_helper(context, &program, &kernel, 1, + &local_param_local_memory_kernel, + "local_param_local_memory_kernel") + != 0) + { + return TEST_FAIL; + } + + kernel_local_usage = 0; + + for (size_t i = 0; i < size; i++) + { + data[i] = i; + } + + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, memory, + data.get(), &error); + test_error(error, "Creating test array failed"); + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, memory, nullptr, &error); + test_error(error, "Creating test array failed"); + + error = clSetKernelArg(kernel, 0, memory, NULL); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_work_size, + nullptr, 0, NULL, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed."); + + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, memory, + data.get(), 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); + + + error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernel_local_usage), + &kernel_local_usage, NULL); + test_error(error, + "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); + + test_assert_error(kernel_local_usage >= 2 * memory, + "kernel local mem size failed"); + + return CL_SUCCESS; +}