Skip to content

Commit

Permalink
Add HF datalayouts
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Oct 22, 2024
1 parent 96bb8f4 commit abab1a8
Show file tree
Hide file tree
Showing 38 changed files with 1,831 additions and 202 deletions.
25 changes: 25 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,20 @@ steps:
agents:
slurm_gpus: 1

- label: ":computer: Float 32 3D sphere baroclinic wave (ρe) HF datalayout GPU"
key: "gpu_baroclinic_wave_rho_e_float32_hf"
command:
- "julia --color=yes --project=.buildkite examples/hybrid/driver.jl"
artifact_paths:
- "examples/hybrid/sphere/output/baroclinic_wave_rhoe_hf/Float32/*"
env:
TEST_NAME: "sphere/baroclinic_wave_rhoe_hf"
FLOAT_TYPE: "Float32"
HorizontalLayout: "IJHF"
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1

- label: ":computer: 3D Box limiters advection slotted spheres"
key: "cpu_box_advection_limiter_slotted_spheres"
command:
Expand Down Expand Up @@ -1870,6 +1884,17 @@ steps:
TEST_NAME: "sphere/baroclinic_wave_rhoe"
FLOAT_TYPE: "Float64"

- label: ":computer: Float 64 3D sphere baroclinic wave (ρe) HF datalayout"
key: "cpu_baroclinic_wave_rho_e_float64_hf"
command:
- "julia --color=yes --project=.buildkite examples/hybrid/driver.jl"
artifact_paths:
- "examples/hybrid/sphere/output/baroclinic_wave_rhoe_hf/Float64/*"
env:
TEST_NAME: "sphere/baroclinic_wave_rhoe_hf"
FLOAT_TYPE: "Float64"
HorizontalLayout: "IJHF"

- label: ":computer: 3D sphere baroclinic wave (ρe)"
key: "cpu_baroclinic_wave_rho_e"
command:
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ ClimaCore.jl Release Notes
main
-------

- Fixed world-age issue on Julia 1.11 issue [Julia#54780](https://github.com/JuliaLang/julia/issues/54780), PR [#2034](https://github.com/CliMA/ClimaCore.jl/pull/2034).
- We've added new datalayouts: `VIJHF`,`IJHF`,`IHF`,`VIHF`, to explore their performance compared to our existing datalayouts: `VIJFH`,`IJFH`,`IFH`,`VIFH`. PR [#2055](https://github.com/CliMA/ClimaCore.jl/pull/2053), PR [#2052](https://github.com/CliMA/ClimaCore.jl/pull/2055).
- We've refactored some modules to use less internals. PR [#2053](https://github.com/CliMA/ClimaCore.jl/pull/2053), PR [#2052](https://github.com/CliMA/ClimaCore.jl/pull/2052), [#2051](https://github.com/CliMA/ClimaCore.jl/pull/2051), [#2049](https://github.com/CliMA/ClimaCore.jl/pull/2049).
- Some work was done in attempt to reduce specializations and compile time. PR [#2042](https://github.com/CliMA/ClimaCore.jl/pull/2042), [#2041](https://github.com/CliMA/ClimaCore.jl/pull/2041)

v0.14.19
-------

- Fixed world-age issue on Julia 1.11 issue [Julia#54780](https://github.com/JuliaLang/julia/issues/54780), PR [#2034](https://github.com/CliMA/ClimaCore.jl/pull/2034).

### ![][badge-🐛bugfix] Fix undefined behavior in `DataLayout`s

PR [#2034](https://github.com/CliMA/ClimaCore.jl/pull/2034) fixes some undefined
Expand Down
4 changes: 4 additions & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ DataLayouts.IFH
DataLayouts.IJFH
DataLayouts.VIFH
DataLayouts.VIJFH
DataLayouts.IHF
DataLayouts.IJHF
DataLayouts.VIHF
DataLayouts.VIJHF
```

## Geometry
Expand Down
6 changes: 4 additions & 2 deletions examples/common_spaces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ function make_horizontal_space(
mesh,
npoly,
context::ClimaComms.SingletonCommsContext,
HorizontalLayout = DataLayouts.IJFH,
)
quad = Quadratures.GLL{npoly + 1}()
if mesh isa Meshes.AbstractMesh1D
topology = Topologies.IntervalTopology(ClimaComms.device(context), mesh)
space = Spaces.SpectralElementSpace1D(topology, quad)
elseif mesh isa Meshes.AbstractMesh2D
topology = Topologies.Topology2D(context, mesh)
space = Spaces.SpectralElementSpace2D(topology, quad)
space = Spaces.SpectralElementSpace2D(topology, quad; HorizontalLayout)
end
return space
end
Expand All @@ -51,13 +52,14 @@ function make_horizontal_space(
mesh,
npoly,
comms_ctx::ClimaComms.MPICommsContext,
HorizontalLayout = DataLayouts.IJFH,
)
quad = Quadratures.GLL{npoly + 1}()
if mesh isa Meshes.AbstractMesh1D
error("Distributed mode does not work with 1D horizontal spaces.")
elseif mesh isa Meshes.AbstractMesh2D
topology = Topologies.Topology2D(comms_ctx, mesh)
space = Spaces.SpectralElementSpace2D(topology, quad)
space = Spaces.SpectralElementSpace2D(topology, quad; HorizontalLayout)
end
return space
end
Expand Down
12 changes: 11 additions & 1 deletion examples/hybrid/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ ClimaComms.@import_required_backends
import SciMLBase
const comms_ctx = ClimaComms.context()
is_distributed = comms_ctx isa ClimaComms.MPICommsContext
using ClimaCore: DataLayouts

using Logging

Expand Down Expand Up @@ -91,7 +92,16 @@ if haskey(ENV, "RESTART_FILE")
ᶠlocal_geometry = Fields.local_geometry_field(Y.f)
else
t_start = FT(0)
h_space = make_horizontal_space(horizontal_mesh, npoly, comms_ctx)
HorizontalLayouts = Dict()
HorizontalLayouts["IJFH"] = DataLayouts.IJFH
HorizontalLayouts["IJHF"] = DataLayouts.IJHF
HorizontalLayout = HorizontalLayouts[get(ENV, "HorizontalLayout", "IJFH")]
h_space = make_horizontal_space(
horizontal_mesh,
npoly,
comms_ctx,
HorizontalLayout,
)
center_space, face_space =
make_hybrid_spaces(h_space, z_max, z_elem; z_stretch)
ᶜlocal_geometry = Fields.local_geometry_field(center_space)
Expand Down
40 changes: 40 additions & 0 deletions examples/hybrid/sphere/baroclinic_wave_rhoe_hf.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using ClimaCorePlots, Plots
using ClimaCore.DataLayouts

include("baroclinic_wave_utilities.jl")

const sponge = false

# Variables required for driver.jl (modify as needed)
horizontal_mesh = cubed_sphere_mesh(; radius = R, h_elem = 4)
npoly = 4
z_max = FT(30e3)
z_elem = 10
t_end = FT(60 * 60 * 24 * 10)
dt = FT(400)
dt_save_to_sol = FT(60 * 60 * 24)
dt_save_to_disk = FT(0) # 0 means don't save to disk
ode_algorithm = CTS.SSP333
jacobian_flags = (; ∂ᶜ𝔼ₜ∂ᶠ𝕄_mode = :no_∂ᶜp∂ᶜK, ∂ᶠ𝕄ₜ∂ᶜρ_mode = :exact)

additional_cache(ᶜlocal_geometry, ᶠlocal_geometry, dt) = merge(
hyperdiffusion_cache(ᶜlocal_geometry, ᶠlocal_geometry; κ₄ = FT(2e17)),
sponge ? rayleigh_sponge_cache(ᶜlocal_geometry, ᶠlocal_geometry, dt) : (;),
)
function additional_tendency!(Yₜ, Y, p, t)
hyperdiffusion_tendency!(Yₜ, Y, p, t)
sponge && rayleigh_sponge_tendency!(Yₜ, Y, p, t)
end

center_initial_condition(local_geometry) =
center_initial_condition(local_geometry, Val(:ρe))
function postprocessing(sol, output_dir)
@info "L₂ norm of ρe at t = $(sol.t[1]): $(norm(sol.u[1].c.ρe))"
@info "L₂ norm of ρe at t = $(sol.t[end]): $(norm(sol.u[end].c.ρe))"

anim = Plots.@animate for Y in sol.u
ᶜv = Geometry.UVVector.(Y.c.uₕ).components.data.:2
Plots.plot(ᶜv, level = 3, clim = (-6, 6))
end
Plots.mp4(anim, joinpath(output_dir, "v.mp4"), fps = 5)
end
4 changes: 3 additions & 1 deletion ext/cuda/data_layouts.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@

import ClimaCore.DataLayouts: AbstractData
import ClimaCore.DataLayouts: FusedMultiBroadcast
import ClimaCore.DataLayouts: IJKFVH, IJFH, VIJFH, VIFH, IFH, IJF, IF, VF, DataF
import ClimaCore.DataLayouts:
IJKFVH, IJFH, IJHF, VIJFH, VIJHF, VIFH, VIHF, IFH, IHF, IJF, IF, VF, DataF
import ClimaCore.DataLayouts: IJFHStyle, VIJFHStyle, VFStyle, DataFStyle
import ClimaCore.DataLayouts: IJHFStyle, VIJHFStyle
import ClimaCore.DataLayouts: promote_parent_array_type
import ClimaCore.DataLayouts: parent_array_type
import ClimaCore.DataLayouts: isascalar
Expand Down
8 changes: 7 additions & 1 deletion ext/cuda/data_layouts_mapreduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ end
function mapreduce_cuda(
f,
op,
data::Union{DataLayouts.VF, DataLayouts.IJFH, DataLayouts.VIJFH};
data::Union{
DataLayouts.VF,
DataLayouts.IJFH,
DataLayouts.IJHF,
DataLayouts.VIJFH,
DataLayouts.VIJHF,
};
weighted_jacobian = OnesArray(parent(data)),
opargs...,
)
Expand Down
56 changes: 40 additions & 16 deletions ext/cuda/data_layouts_threadblock.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,33 @@ bounds to ensure that the result of
function is_valid_index end

##### VIJFH
@inline function partition(data::DataLayouts.VIJFH, n_max_threads::Integer)
@inline function partition(
data::Union{DataLayouts.VIJFH, DataLayouts.VIJHF},
n_max_threads::Integer,
)
(Nij, _, _, Nv, Nh) = DataLayouts.universal_size(data)
Nv_thread = min(Int(fld(n_max_threads, Nij * Nij)), Nv)
Nv_blocks = cld(Nv, Nv_thread)
@assert prod((Nv_thread, Nij, Nij)) n_max_threads "threads,n_max_threads=($(prod((Nv_thread, Nij, Nij))),$n_max_threads)"
return (; threads = (Nv_thread, Nij, Nij), blocks = (Nv_blocks, Nh))
end
@inline function universal_index(::DataLayouts.VIJFH)
@inline function universal_index(::Union{DataLayouts.VIJFH, DataLayouts.VIJHF})
(tv, i, j) = CUDA.threadIdx()
(bv, h) = CUDA.blockIdx()
v = tv + (bv - 1) * CUDA.blockDim().x
return CartesianIndex((i, j, 1, v, h))
end
@inline is_valid_index(::DataLayouts.VIJFH, I::CI5, us::UniversalSize) =
1 I[4] DataLayouts.get_Nv(us)
@inline is_valid_index(
::Union{DataLayouts.VIJFH, DataLayouts.VIJHF},
I::CI5,
us::UniversalSize,
) = 1 I[4] DataLayouts.get_Nv(us)

##### IJFH
@inline function partition(data::DataLayouts.IJFH, n_max_threads::Integer)
@inline function partition(
data::Union{DataLayouts.IJFH, DataLayouts.IJHF},
n_max_threads::Integer,
)
(Nij, _, _, _, Nh) = DataLayouts.universal_size(data)
Nh_thread = min(
Int(fld(n_max_threads, Nij * Nij)),
Expand All @@ -75,31 +84,40 @@ end
@assert prod((Nij, Nij)) n_max_threads "threads,n_max_threads=($(prod((Nij, Nij))),$n_max_threads)"
return (; threads = (Nij, Nij, Nh_thread), blocks = (Nh_blocks,))
end
@inline function universal_index(::DataLayouts.IJFH)
@inline function universal_index(::Union{DataLayouts.IJFH, DataLayouts.IJHF})
(i, j, th) = CUDA.threadIdx()
(bh,) = CUDA.blockIdx()
h = th + (bh - 1) * CUDA.blockDim().z
return CartesianIndex((i, j, 1, 1, h))
end
@inline is_valid_index(::DataLayouts.IJFH, I::CI5, us::UniversalSize) =
1 I[5] DataLayouts.get_Nh(us)
@inline is_valid_index(
::Union{DataLayouts.IJFH, DataLayouts.IJHF},
I::CI5,
us::UniversalSize,
) = 1 I[5] DataLayouts.get_Nh(us)

##### IFH
@inline function partition(data::DataLayouts.IFH, n_max_threads::Integer)
@inline function partition(
data::Union{DataLayouts.IFH, DataLayouts.IHF},
n_max_threads::Integer,
)
(Ni, _, _, _, Nh) = DataLayouts.universal_size(data)
Nh_thread = min(Int(fld(n_max_threads, Ni)), Nh)
Nh_blocks = cld(Nh, Nh_thread)
@assert prod((Ni, Nh_thread)) n_max_threads "threads,n_max_threads=($(prod((Ni, Nh_thread))),$n_max_threads)"
return (; threads = (Ni, Nh_thread), blocks = (Nh_blocks,))
end
@inline function universal_index(::DataLayouts.IFH)
@inline function universal_index(::Union{DataLayouts.IFH, DataLayouts.IHF})
(i, th) = CUDA.threadIdx()
(bh,) = CUDA.blockIdx()
h = th + (bh - 1) * CUDA.blockDim().y
return CartesianIndex((i, 1, 1, 1, h))
end
@inline is_valid_index(::DataLayouts.IFH, I::CI5, us::UniversalSize) =
1 I[5] DataLayouts.get_Nh(us)
@inline is_valid_index(
::Union{DataLayouts.IFH, DataLayouts.IHF},
I::CI5,
us::UniversalSize,
) = 1 I[5] DataLayouts.get_Nh(us)

##### IJF
@inline function partition(data::DataLayouts.IJF, n_max_threads::Integer)
Expand All @@ -126,21 +144,27 @@ end
@inline is_valid_index(::DataLayouts.IF, I::CI5, us::UniversalSize) = true

##### VIFH
@inline function partition(data::DataLayouts.VIFH, n_max_threads::Integer)
@inline function partition(
data::Union{DataLayouts.VIFH, DataLayouts.VIHF},
n_max_threads::Integer,
)
(Ni, _, _, Nv, Nh) = DataLayouts.universal_size(data)
Nv_thread = min(Int(fld(n_max_threads, Ni)), Nv)
Nv_blocks = cld(Nv, Nv_thread)
@assert prod((Nv_thread, Ni)) n_max_threads "threads,n_max_threads=($(prod((Nv_thread, Ni))),$n_max_threads)"
return (; threads = (Nv_thread, Ni), blocks = (Nv_blocks, Nh))
end
@inline function universal_index(::DataLayouts.VIFH)
@inline function universal_index(::Union{DataLayouts.VIFH, DataLayouts.VIHF})
(tv, i) = CUDA.threadIdx()
(bv, h) = CUDA.blockIdx()
v = tv + (bv - 1) * CUDA.blockDim().x
return CartesianIndex((i, 1, 1, v, h))
end
@inline is_valid_index(::DataLayouts.VIFH, I::CI5, us::UniversalSize) =
1 I[4] DataLayouts.get_Nv(us)
@inline is_valid_index(
::Union{DataLayouts.VIFH, DataLayouts.VIHF},
I::CI5,
us::UniversalSize,
) = 1 I[4] DataLayouts.get_Nv(us)

##### VF
@inline function partition(data::DataLayouts.VF, n_max_threads::Integer)
Expand Down
Loading

0 comments on commit abab1a8

Please sign in to comment.