Skip to content

Commit

Permalink
Try #1398:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] authored Aug 1, 2023
2 parents f84433c + e8d4307 commit 5e27797
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 18 deletions.
78 changes: 63 additions & 15 deletions src/Operators/finitedifference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3420,7 +3420,6 @@ function strip_space(bc::StencilBroadcasted{Style}, parent_space) where {Style}
)
end


function Base.copyto!(
out::Field,
bc::Union{
Expand All @@ -3437,26 +3436,75 @@ function Base.copyto!(
Nq = 1
Nh = 1
end
bounds = window_bounds(space, bc)
# executed
@cuda threads = (Nq, Nq) blocks = (Nh,) copyto_stencil_kernel!(
strip_space(out, space),
strip_space(bc, space),
axes(out),
bounds,
)
(li, lw, rw, ri) = bounds = window_bounds(space, bc)
ninteriornodes = rw - lw + 1

max_threads = 256
nitemsbdy = Nq * Nq * Nh # # of independent boundary items
nitemsint = ninteriornodes * Nq * Nq * Nh # # of independent interior items
(nthreadsbdy, nblocksbdy) = Spaces._configure_threadblock(nitemsbdy)
(nthreadsint, nblocksint) = Spaces._configure_threadblock(nitemsint)
isnotperiodic = !Topologies.isperiodic(Spaces.vertical_topology(space))
strip_space_out = strip_space(out, space)
strip_space_bc = strip_space(bc, space)
args = (strip_space_out, strip_space_bc, axes(out), bounds, Nq, Nh)
# left window, if applicable
isnotperiodic &&
@cuda threads = (nthreadsbdy,) blocks = (nblocksbdy,) copyto_stencil_bdy_kernel!(
args...,
LeftBoundaryWindow{Spaces.left_boundary_name(space)}(),
)
# interior nodes
@cuda threads = (nthreadsint,) blocks = (nblocksint,) copyto_stencil_interior_kernel!(
args...,
ninteriornodes,
)
# right window, if applicable
isnotperiodic &&
@cuda threads = (nthreadsbdy,) blocks = (nblocksbdy,) copyto_stencil_bdy_kernel!(
args...,
RightBoundaryWindow{Spaces.right_boundary_name(space)}(),
)
return out
end

function copyto_stencil_kernel!(out, bc, space, bds)
i = threadIdx().x
j = threadIdx().y
h = blockIdx().x
hidx = (i, j, h)
apply_stencil!(space, out, bc, hidx, bds)
function copyto_stencil_bdy_kernel!(out, bc, space, bds, Nq, Nh, bw)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid Nq * Nq * Nh
(li, lw, rw, ri) = bds
hidx = Spaces._get_idx((Nq, Nq, Nh), gid)
if bw isa LeftBoundaryWindow
@inbounds for idx in li:(lw - 1)
setidx!(space, out, idx, hidx, getidx(space, bc, bw, idx, hidx))
end
else
@inbounds for idx in (rw + 1):ri
setidx!(space, out, idx, hidx, getidx(space, bc, bw, idx, hidx))
end
end
end
return nothing
end

function copyto_stencil_interior_kernel!(out, bc, space, bds, Nq, Nh, nnodes)
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid nnodes * Nq * Nq * Nh
(_, lw, rw, _) = bds
(ndidx, i, j, h) = Spaces._get_idx((nnodes, Nq, Nq, Nh), gid)
hidx = (i, j, h)
ndidx += lw - 1
setidx!(
space,
out,
ndidx,
hidx,
getidx(space, bc, Interior(), ndidx, hidx),
)
end
return nothing
end


function Base.copyto!(
field_out::Field,
bc::Union{
Expand Down
7 changes: 5 additions & 2 deletions src/Spaces/dss_cuda.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@

_max_threads_cuda() = 256

function _configure_threadblock(nitems)
nthreads = min(_max_threads_cuda(), nitems)
function _configure_threadblock(max_threads, nitems)
nthreads = min(max_threads, nitems)
nblocks = cld(nitems, nthreads)
return (nthreads, nblocks)
end

_configure_threadblock(nitems) =
_configure_threadblock(_max_threads_cuda(), nitems)

function dss_load_perimeter_data!(
::ClimaComms.CUDADevice,
dss_buffer::DSSBuffer,
Expand Down
2 changes: 1 addition & 1 deletion test/MatrixFields/matrix_field_broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ function test_matrix_broadcast_against_array_reference(;
set_result!::F2,
get_temp_values::F3 = (_...) -> (),
ref_set_result!::F4,
time_ratio_limit = 1,
time_ratio_limit = 10,
max_eps_error_limit = 7,
test_broken_with_cuda = false,
) where {F1, F2, F3, F4}
Expand Down

0 comments on commit 5e27797

Please sign in to comment.