Skip to content

Commit

Permalink
Disable argument conversion when calling raw OpenCL kernels. (#224)
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt authored Sep 10, 2024
1 parent 3b60b84 commit a43ab12
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 158 deletions.
133 changes: 12 additions & 121 deletions lib/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,131 +83,22 @@ function set_arg!(k::Kernel, idx::Integer, arg::LocalMem)
return k
end

function _contains_different_layout(::Type{T}) where T
sizeof(T) == 0 && return true
nfields(T) == 0 && return false
for fname in fieldnames(T)
contains_different_layout(fieldtype(T, fname)) && return true
end
return false
end

contains_different_layout(::Type{NTuple{3, T}}) where {T <: Union{Float32, Float64, Int8, Int32,
Int64, UInt8, UInt32, UInt64}} = true

"""
contains_different_layout(T)
Empty types and NTuple{3, CLNumber} have different layouts and need to be replaced
(Where `CLNumber <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64}`)
TODO: Float16 + Int16 should also be in CLNumbers
"""
@generated function contains_different_layout(::Type{T}) where T
:($(_contains_different_layout(T)))
end

function struct2tuple(x::T) where T
ntuple(nfields(x)) do i
getfield(x, i)
end
end

"""
replace_different_layout(x::T) where T
Replaces types with a layout different from OpenCL.
See [contains_different_layout(T)](@ref) for information what types those are!
"""
function replace_different_layout(x::T) where T
!contains_different_layout(T) && return x
if sizeof(x) === 0
return Int32(0) # zero size not possible in opencl
elseif nfields(x) == 0
replace_different_layout((), (x,))
elseif T <: Tuple
replace_different_layout((), x)
else
replace_different_layout((), struct2tuple(x))
end
end

replace_different_layout(red::NTuple{N, Any}, rest::Tuple{}) where N = red
function replace_different_layout(red::NTuple{N, Any}, rest) where N
elem1 = first(rest)
T = typeof(elem1)
repl = if sizeof(T) == 0 && nfields(elem1) == 0
Int32(0)
elseif contains_different_layout(T)
replace_different_layout(elem1)
else
elem1
end
replace_different_layout((red..., repl), Base.tail(rest))
end

# TODO UInt16/Float16?
# Handle different sizes of OpenCL Vec3, which doesn't agree with julia
function replace_different_layout(arg::NTuple{3, T}) where T <: Union{Float32, Float64, Int8, Int32, Int64, UInt8, UInt32, UInt64}
pad = T(0)
(arg..., pad)
end

function to_cl_ref(arg::T) where T
if !Base.datatype_pointerfree(T)
error("Types should not contain pointers: $T")
end
if contains_different_layout(T)
x = replace_different_layout(arg)
return Base.RefValue(x), sizeof(x)
end
Base.RefValue(arg), sizeof(arg)
end


Base.@pure datatype_align(x::T) where {T} = datatype_align(T)
Base.@pure function datatype_align(::Type{T}) where {T}
# typedef struct {
# uint32_t nfields;
# uint32_t alignment : 9;
# uint32_t haspadding : 1;
# uint32_t npointers : 20;
# uint32_t fielddesc_type : 2;
# } jl_datatype_layout_t;
field = T.layout + sizeof(UInt32)
unsafe_load(convert(Ptr{UInt16}, field)) & convert(Int16, 2^9-1)
end


function set_arg!(k::Kernel, idx::Integer, arg::T) where T
@assert idx > 0 "Kernel idx must be bigger 0"
ref, tsize = to_cl_ref(arg)
ref = Ref(arg)
tsize = sizeof(ref)
err = unchecked_clSetKernelArg(k, cl_uint(idx - 1), tsize, ref)
if err == CL_INVALID_ARG_SIZE
error("""
Julia and OpenCL type don't match at kernel argument $idx: Found $T.
Please make sure to define OpenCL structs correctly!
You should be generally fine by using `__attribute__((packed))`, but sometimes the alignment of fields is different from Julia.
Consider the following example:
```
//packed
// Tuple{NTuple{3, Float32}, Nothing, Float32}
struct __attribute__((packed)) Test{
float3 f1;
int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL)
// you might need to define the alignement of fields to match julia's layout
float f3; // for the types used here the alignement matches though!
};
// this is a case where Julia and OpenCL packed alignment would differ, so we need to specify it explicitely
// Tuple{Int64, Int32}
struct __attribute__((packed)) Test2{
long f1;
int __attribute__((aligned (8))) f2; // opencl would align this to 4 in packed layout, while Julia uses 8!
};
```
You can use `c.datatype_align(T)` to figure out the alignment of a Julia type!
""")
end
if err != CL_SUCCESS
error("""Mismatch between Julia and OpenCL type for kernel argument $idx.
Possible reasons:
- OpenCL does not support empty types.
- Vectors of length 3 (e.g., `float3`) are packed as 4-element vectors;
consider padding your tuples.
- The alignment of fields in your struct may not match the OpenCL layout.
Make sure your Julia definition matches the OpenCL layout, e.g., by
using `__attribute__((packed))` in your OpenCL struct definition.""")
elseif err != CL_SUCCESS
throw(CLError(err))
end
return k
Expand Down
53 changes: 16 additions & 37 deletions test/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,48 +145,27 @@
@test r == [1f0, 4f0]
end

@testset "empty types" begin
@testset "vector arguments" begin
test_source = "
//packed
struct __attribute__((packed)) Test{
float3 f1;
int f2; // empty type gets replaced with Int32 (no empty types allowed in OpenCL)
// you might need to define the alignement of fields to match julia's layout
float f3; // for the types used here the alignement matches though!
};
__kernel void structest(__global float *out, struct Test a){
out[0] = a.f1.x;
out[1] = a.f1.y;
out[2] = a.f1.z;
out[3] = a.f3;
__kernel void vec3_unpack(__global float *out, float3 a, float3 b) {
out[0] = a.x;
out[1] = a.y;
out[2] = a.z;
out[3] = b.x;
out[4] = b.y;
out[5] = b.z;
}
"

CLTestStruct = @eval(module $(gensym("KernelTest"))
struct CLTestStruct
f1::NTuple{3, Float32}
f2::Nothing
f3::Float32
end
end).CLTestStruct

prg = cl.Program(source = test_source)
cl.build!(prg)
structkernel = cl.Kernel(prg, "structest")
out = cl.Buffer(Float32, 4, :w)
astruct = CLTestStruct((1f0, 2f0, 3f0), nothing, 22f0)
cl.call(structkernel, out, astruct)
vec3kernel = cl.Kernel(prg, "vec3_unpack")
out = cl.Buffer(Float32, 6, :w)
# NOTE: the user is responsible for padding the vector to 4 elements
# (only on some platforms)
vec3_a = (1f0, 2f0, 3f0, 0f0)
vec3_b = (4f0, 5f0, 6f0, 0f0)
cl.call(vec3kernel, out, vec3_a, vec3_b)
r = cl.read(out)
@test r == [1f0, 2f0, 3f0, 22f0]
end

@testset "layout" begin
x = ((10f0, 1f0, 2f0), (10f0, 1f0, 2f0), (10f0, 1f0, 2f0))
clx = cl.replace_different_layout(x)

@test clx == ((10f0, 1f0, 2f0, 0f0), (10f0, 1f0, 2f0, 0f0), (10f0, 1f0, 2f0, 0f0))
x = (nothing, nothing, nothing)
clx = cl.replace_different_layout(x)
@test clx == 0 # TODO should it be like this?
@test r == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0]
end
end

0 comments on commit a43ab12

Please sign in to comment.