Skip to content

Commit

Permalink
[GPU] Fix softmax class optimized kernel (openvinotoolkit#22912)
Browse files Browse the repository at this point in the history
### Details:
- Disabled incorrect blocked IO usage for the cases when pitch over
softmax axis dimension > 1

### Tickets:
 - openvinotoolkit#22851
  • Loading branch information
vladimir-paramuzov authored Feb 19, 2024
1 parent b6af3f6 commit bd2fce7
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ namespace kernel_selector {
// how many workitems we use to calculate item classes for one output, only 16 supported right now
static const auto workitems_per_classes = 16;

inline static size_t get_class_pitch(const DataTensor& tensor, SoftmaxDim dim) {
switch (dim) {
case SoftmaxDim::X: return tensor.X().pitch;
case SoftmaxDim::Y: return tensor.Y().pitch;
case SoftmaxDim::Z: return tensor.Z().pitch;
case SoftmaxDim::FEATURE: return tensor.Feature().pitch;
case SoftmaxDim::BATCH: return tensor.Batch().pitch;
default: return 0;
}
}

inline static size_t GetItemClassCount(const DataTensor& input, SoftmaxDim dim) {
size_t item_class_count = 0;

Expand Down Expand Up @@ -109,10 +120,12 @@ KernelsPriority SoftmaxKerneItemsClassOptimized::GetKernelsPriority(const Params
JitConstants SoftmaxKerneItemsClassOptimized::GetJitConstants(const softmax_params& params, DispatchData dispatchData) const {
auto jit = SoftmaxItemsClassKernelBase::GetJitConstants(params, dispatchData);

// sub_group_block_write requires aligned memory,
// therefore it can be utilized if either memory is aligned by 16 bytes
bool isSubGroupBlockIOEnabled = params.dim != SoftmaxDim::BATCH &&
(dispatchData.dataSetSize * params.outputs[0].ElementSize()) % 16 == 0;
// sub_group_block_write requires
// 1. aligned memory, therefore it can be utilized if memory is aligned by 16 bytes
// 2. class dimension is innermost or all other dims equal to 1
bool isSubGroupBlockIOEnabled = get_class_pitch(params.outputs[0], params.dim) == 1 &&
get_class_pitch(params.inputs[0], params.dim) == 1 &&
(dispatchData.dataSetSize * params.outputs[0].ElementSize()) % 16 == 0;

jit.AddConstants({
MakeJitConstant("LEFTOVERS", dispatchData.leftovers),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ INSTANTIATE_TEST_SUITE_P(

const std::vector<ov::Shape> inputShapes3D = {
{16, 64, 64},
{1, 256, 256},
};

const std::vector<int64_t> axis3D = {
-1
-1, 1
};

const auto params3D = testing::Combine(
Expand Down

0 comments on commit bd2fce7

Please sign in to comment.