diff --git a/src/gpuarrays.jl b/src/gpuarrays.jl
index 1cc3949..d15bac6 100644
--- a/src/gpuarrays.jl
+++ b/src/gpuarrays.jl
@@ -1,8 +1,5 @@
 # GPUArrays.jl interface
 
-import KernelAbstractions
-import KernelAbstractions: Backend
-
 #
 # Device functionality
 #
@@ -10,24 +7,6 @@ import KernelAbstractions: Backend
 
 ## execution
 
-@inline function GPUArrays.launch_heuristic(::oneAPIBackend, obj::O, args::Vararg{Any,N};
-                                            elements::Int, elements_per_thread::Int) where {O,N}
-    ndrange = ceil(Int, elements / elements_per_thread)
-    ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange,
-                                                                  nothing)
-
-    # this might not be the final context, since we may tune the workgroupsize
-    ctx = KA.mkcontext(obj, ndrange, iterspace)
-
-    kernel = @oneapi launch=false obj.f(ctx, args...)
-
-    items = launch_configuration(kernel)
-    # XXX: how many groups is a good number? the API doesn't tell us.
-    #      measured on a low-end IGP, 32 blocks seems like a good sweet spot.
-    #      note that this only matters for grid-stride kernels, like broadcast.
-    return (threads=items, blocks=32)
-end
-
 const GLOBAL_RNGs = Dict{ZeDevice,GPUArrays.RNG}()
 function GPUArrays.default_rng(::Type{<:oneArray})
     dev = device()