Skip to content

Commit

Permalink
updated benchmark scheduler enum names
Browse files Browse the repository at this point in the history
  • Loading branch information
FMarno committed Jan 14, 2025
1 parent 54fd338 commit dc2cea1
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
16 changes: 9 additions & 7 deletions benchmarks/pvc/benchmarks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "../benchmark_runner.hpp"
#include "gemm_configuration.hpp"

using Scheduler = cutlass::gemm::device::Scheduler;

using MMAAtom = MMA_Atom<XE_8x16x16_F32BF16BF16F32_TT>;
using PvcGemmBF16BF16FP32_RRR_1 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -43,7 +45,7 @@ using PvcGemmBF16BF16FP32_RRR_1 = cutlass::gemm::device::GemmConfiguration<
float, Shape<_256, _256, _32>,
TiledMMA<MMAAtom, Layout<Shape<_8,_4,_1>>>,
XE_2D_U16x32x32_LD_N, XE_2D_U16x32x32_LD_V,
cutlass::gemm::device::Scheduler::Parallel>;
Scheduler::Gemm>;

using PvcGemmBF16BF16FP32_RRR_2 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -53,7 +55,7 @@ using PvcGemmBF16BF16FP32_RRR_2 = cutlass::gemm::device::GemmConfiguration<
float, Shape<_128, _512, _32>,
TiledMMA<MMAAtom, Layout<Shape<_4,_8,_1>>>,
XE_2D_U16x32x32_LD_N, XE_2D_U16x32x32_LD_V,
cutlass::gemm::device::Scheduler::Parallel>;
Scheduler::Gemm>;

using PvcGemmBF16BF16FP32_RRR_3 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -63,7 +65,7 @@ using PvcGemmBF16BF16FP32_RRR_3 = cutlass::gemm::device::GemmConfiguration<
float, Shape<_256, _128, _32>,
TiledMMA<MMAAtom, Layout<Shape<_8,_4,_1>>>,
XE_2D_U16x32x32_LD_N, XE_2D_U16x32x32_LD_V,
cutlass::gemm::device::Scheduler::Parallel>;
Scheduler::Gemm>;

using PvcGemmBF16BF16FP32_RRR_4 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -73,7 +75,7 @@ using PvcGemmBF16BF16FP32_RRR_4 = cutlass::gemm::device::GemmConfiguration<
float, Shape<_128, _256, _16>,
TiledMMA<MMAAtom, Layout<Shape<_4,_8,_1>>>,
XE_2D_U16x32x16_LD_N, XE_2D_U16x16x32_LD_V,
cutlass::gemm::device::Scheduler::Parallel>;
Scheduler::Gemm>;

using PvcGemmBF16BF16FP32_RRR_5 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -83,7 +85,7 @@ using PvcGemmBF16BF16FP32_RRR_5 = cutlass::gemm::device::GemmConfiguration<
float, Shape<_8, _128, _32>,
TiledMMA<MMAAtom, Layout<Shape<_1,_4,_1>>>,
XE_2D_U16x8x32_LD_N, XE_2D_U16x32x32_LD_V,
cutlass::gemm::device::Scheduler::Parallel>;
Scheduler::Gemm>;

CUTLASS_CREATE_GEMM_BENCHMARK(PvcGemmBF16BF16FP32_RRR_1);
CUTLASS_CREATE_GEMM_BENCHMARK(PvcGemmBF16BF16FP32_RRR_2);
Expand All @@ -99,7 +101,7 @@ using PvcGemmBF16BF16FP32_StreamK_RRR_1 = cutlass::gemm::device::GemmConfigurati
float, Shape<_256, _256, _32>,
TiledMMA<MMAAtom, Layout<Shape<_8,_4,_1>>>,
XE_2D_U16x32x32_LD_N, XE_2D_U16x32x32_LD_V,
cutlass::gemm::device::Scheduler::StreamK>;
Scheduler::GemmStreamK>;

CUTLASS_CREATE_GEMM_BENCHMARK(PvcGemmBF16BF16FP32_StreamK_RRR_1);

Expand All @@ -111,7 +113,7 @@ using PvcGemmBF16BF16FP32_SplitK_RRR_1 = cutlass::gemm::device::GemmConfiguratio
float, Shape<_256, _256, _32>,
TiledMMA<MMAAtom, Layout<Shape<_8,_4,_1>>>,
XE_2D_U16x32x32_LD_N, XE_2D_U16x32x32_LD_V,
cutlass::gemm::device::Scheduler::SplitK>;
Scheduler::GemmSplitK>;

CUTLASS_CREATE_GEMM_BENCHMARK(PvcGemmBF16BF16FP32_SplitK_RRR_1);

Expand Down
10 changes: 5 additions & 5 deletions benchmarks/pvc/gemm_configuration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace cutlass {
namespace gemm {
namespace device {

enum class Scheduler { Parallel, SplitK, StreamK };
enum class Scheduler { Gemm, GemmSplitK, GemmStreamK };

template<
class ArchTag,
Expand Down Expand Up @@ -115,22 +115,22 @@ struct GemmConfiguration<
Shape<int, int, int, int>,
CollectiveMainloop,
CollectiveEpilogue,
std::conditional_t<TileScheduler == Scheduler::Parallel, void, cutlass::gemm::StreamKScheduler>
std::conditional_t<TileScheduler == Scheduler::Gemm, void, cutlass::gemm::StreamKScheduler>
>;

using Gemm = GemmUniversalAdapter<GemmKernel>;

constexpr static typename GemmKernel::Arguments defaultArguments() {
using StreamKMode =
cutlass::gemm::kernel::detail::PersistentTileSchedulerXeStreamKParams::DecompositionMode;
if constexpr (TileScheduler == Scheduler::Parallel) {
if constexpr (TileScheduler == Scheduler::Gemm) {
return {};
} else if constexpr (TileScheduler == Scheduler::StreamK) {
} else if constexpr (TileScheduler == Scheduler::GemmStreamK) {
typename GemmKernel::Arguments arguments{};
arguments.scheduler = {1, StreamKMode::StreamK};
return arguments;
} else {
static_assert(TileScheduler == Scheduler::SplitK);
static_assert(TileScheduler == Scheduler::GemmSplitK);
typename GemmKernel::Arguments arguments{};
arguments.scheduler = {1, StreamKMode::SplitK};
return arguments;
Expand Down

0 comments on commit dc2cea1

Please sign in to comment.