Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] enable brdgmm kernel in CPU plugin #27589

Merged
merged 17 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/plugins/intel_cpu/src/nodes/conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
impl_desc_type::winograd_acl,
impl_desc_type::gemm_acl,
impl_desc_type::acl,
impl_desc_type::brgconv_avx512_dw,
impl_desc_type::brgconv_avx512_amx_1x1,
impl_desc_type::brgconv_avx512_amx,
impl_desc_type::jit_avx512_amx_dw,
Expand All @@ -353,6 +354,7 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
impl_desc_type::jit_avx512_dw,
impl_desc_type::jit_avx512_1x1,
impl_desc_type::jit_avx512,
impl_desc_type::brgconv_avx2_dw,
impl_desc_type::brgconv_avx2_1x1,
impl_desc_type::brgconv_avx2,
impl_desc_type::jit_uni_dw,
Expand Down Expand Up @@ -815,15 +817,19 @@ void Convolution::initSupportedPrimitiveDescriptors() {
#endif
for (size_t dIdx = 0; dIdx < descs.size(); dIdx++) {
auto& desc = descs[dIdx];
auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get()));
auto primitive_desc = desc.get(true); //true mean allow empty
if (primitive_desc == nullptr) {
continue;
}
auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(primitive_desc));

auto add_supported_desc = [&](dnnl::primitive_desc& desc) {
addSupportedPrimitiveDescriptor(desc);
descIdx.push_back(dIdx);
};

const bool first_match = customImplPriorities.empty();
DEBUG_LOG("#", getName(),
DEBUG_LOG("#", getName(), ",descIndex:", dIdx + 1, "/", descs.size(),
", itpd.impl_info_str(): ", desc.impl_info_str(),
", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())),
", first_match: ", first_match ? "true" : "false");
Expand Down Expand Up @@ -944,8 +950,7 @@ void Convolution::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
const auto desc = createDescriptorInternal(getEngine(),
inDnnlDesc, weightDnnlDesc, biasDnnlDesc, outDnnlDesc, withBiases,
stride, dilation, paddingL, paddingR, alg, attr);
if (desc)
descs.emplace_back(desc);
descs.emplace_back(desc);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ impl_desc_type parse_impl_name(std::string impl_desc_name) {
if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); }
// Replace the ONEDNN pd name with OV definition.
REPLACE_WORD(brg_conv, brgconv);
REPLACE_WORD(brdgmm, brgconv);
REPLACE_WORD(avx10_1_512, avx512);
REPLACE_WORD(brg_matmul, brgemm);

Expand Down Expand Up @@ -119,6 +120,8 @@ const char* impl_type_to_string(impl_desc_type type) {
CASE(brgconv_sse42_1x1);
CASE(brgconv_uni_1x1);
CASE(brgconv_avx512_amx_1x1);
CASE(brgconv_avx512_dw);
CASE(brgconv_avx2_dw);
CASE(brgemm_avx512);
CASE(brgemm_avx2);
CASE(brgemm_avx);
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/onednn/iml_type_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ enum impl_desc_type : int64_t {
brgconv_uni_1x1 = brgconv | uni | _1x1,
brgconv_avx512_amx_1x1 = brgconv | avx512 | amx | _1x1,

brgconv_avx2_dw = brgconv_avx2 | _dw,
brgconv_avx512_dw = brgconv_avx512 | _dw,

brgemm_avx512 = brgemm | avx512,
brgemm_avx2 = brgemm | avx2,
brgemm_avx = brgemm | avx,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "shared_test_classes/single_op/group_convolution.hpp"

#include "common_test_utils/node_builders/group_convolution.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "utils/convolution_params.hpp"
#include "utils/cpu_test_utils.hpp"
Expand Down Expand Up @@ -176,14 +177,15 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface<groupCon
std::tie(groupConvParams, netType, inType, outType, inputShape, targetDevice) = basicParamsSet;

init_input_shapes({inputShape});

if (configuration.count(ov::hint::inference_precision.name()) &&
ov::element::bf16 == configuration[ov::hint::inference_precision.name()].as<ov::element::Type>()) {
selectedType += "_bf16";
rel_threshold = 1e-2f;
} else {
selectedType = makeSelectedTypeStr(selectedType, netType);
const auto& it = configuration.find(ov::hint::inference_precision.name());
if (it != configuration.end()) {
if (ov::element::bf16 == it->second.as<ov::element::Type>()) {
rel_threshold = 1e-2f;
} else if (ov::element::f16 == it->second.as<ov::element::Type>()) {
rel_threshold = 0.00125f;
}
}
selectedType = makeSelectedTypeStr(selectedType, deduce_expected_precision(netType, configuration));

// according to range propagation feature, resolution of generated inputs data for parameters moved from 32 to 32768
// 'real' part of input data was changed and some fails became visible for cases with Elu and FakeQuantize, so let's setup abs_threshold
Expand Down Expand Up @@ -289,6 +291,7 @@ std::vector<CPUSpecificParams> filterCPUInfoForDeviceSupportBF16(std::vector<CPU
}
return resParamsSet;
}

/* ===================== */

/* COMMON PARAMS */
Expand All @@ -313,6 +316,33 @@ const std::vector<fusingSpecificParams> fusingParamsSetBF16{emptyFusingSpec,
// sum
fusingSum};

const std::vector<fusingSpecificParams> fusingParamsSet_Brdgmm{emptyFusingSpec,
// eltwise
fusingRelu,
fusingPRelu1D,
// depthwise
fusingReluScaleShift,
// fake quantize
fusingFakeQuantizePerTensorRelu,
fusingFakeQuantizePerChannelRelu
// sum
// comment out sum due to MFDNN-12841
//fusingSumEluFQ,
//fusingSum
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
};

const std::vector<fusingSpecificParams> fusingParamsSetBF16_Brdgmm{emptyFusingSpec,
// eltwise
fusingRelu,
// depthwise
fusingReluScaleShift
// sum
// comment out sum due to MFDNN-12841
//fusingSum
};

const std::vector<fusingSpecificParams> fusingParamsSetFP16_Brdgmm = fusingParamsSetBF16_Brdgmm;

/* ============= GroupConvolution params (planar layout) ============= */
const std::vector<size_t> numOutChannels_Gemm = {6};
const std::vector<size_t> numGroups_Gemm = {2, 3};
Expand Down Expand Up @@ -1299,6 +1329,38 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP32,
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

const std::vector<std::vector<size_t>> dilations2d_Brdgmm = {{1, 1}};
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
const auto groupConvParams_ExplicitPadding_DW_2D_Brdgmm = ::testing::Combine(::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d_Brdgmm),
::testing::ValuesIn(numOutChannels_DW),
::testing::ValuesIn(numGroups_DW),
::testing::Values(ov::op::PadType::EXPLICIT));
const auto BrdgmmCPUSpec = []()-> std::vector<CPUSpecificParams> {
std::string isaStr;
if (ov::with_cpu_x86_avx512f()) {
isaStr = "avx512";
} else {
isaStr = "avx2";
}
return {CPUSpecificParams{{}, {}, {}, "brgconv_" + isaStr + "_dw"}};
};

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP32_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes2dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSet_Brdgmm),
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D,
Expand All @@ -1313,6 +1375,32 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16,
::testing::Values(cpu_bf16_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes2dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDeviceSupportBF16(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSetBF16_Brdgmm),
::testing::Values(cpu_bf16_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP16_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes2dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSetFP16_Brdgmm),
::testing::Values(cpu_f16_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

/* ============= GroupConvolution (DW 3D) ============= */
const auto groupConvParams_ExplicitPadding_DW_3D = ::testing::Combine(::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
Expand Down Expand Up @@ -1349,6 +1437,30 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_DW_FP32,
::testing::ValuesIn(fusingParamsSet),
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

const std::vector<std::vector<size_t>> dilations3d_Brdgmm = {{1, 1, 1}};
const auto groupConvParams_ExplicitPadding_DW_3D_Brdgmm = ::testing::Combine(::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d_Brdgmm),
::testing::ValuesIn(numOutChannels_DW),
::testing::ValuesIn(numGroups_DW),
::testing::Values(ov::op::PadType::EXPLICIT));

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_DW_FP32_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_3D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes3dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSet_Brdgmm),
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

/* ========= */

/* ============= SINGLE TEST CASES ============= */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*BinaryConvolutionLayerTest.*)",
// TODO: 53618. BF16 gemm ncsp convolution crash
R"(.*_GroupConv.*_inFmts=nc.*_primitive=jit_gemm.*ENFORCE_BF16=YES.*)",
// TODO: 53578. fork DW bf16 convolution does not support 3d cases yet
R"(.*_DW_GroupConv.*_inFmts=(ndhwc|nCdhw16c).*ENFORCE_BF16=YES.*)",
// TODO: 56143. Enable nspc convolutions for bf16 precision
R"(.*ConvolutionLayerCPUTest.*_inFmts=(ndhwc|nhwc).*INFERENCE_PRECISION_HINT=bf16.*)",
// TODO: 157596 convolution bf16 leftover test case
R"(smoke_JIT_AVX512_DW_GroupConv/GroupConvolutionLayerCPUTest.*ndhwc.*jit_avx512_dw.*INFERENCE_PRECISION_HINT=bf16.*)",
R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\]_TS=\(\((1|2)\.6(4|7)\.7\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[1\.\.200\.64\.\?\]_TS=\(\(2\.64\.7\)_\(1\.64\.5\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\?\.6(4|7)\.1\.\.200\]_TS=\(\(2\.6(4|7)\.7\)_\(1\.6(4|7)\.9\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
R"(smoke_GroupConv_brgemm_2D_BF16/GroupConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\]_TS=\(\(1\.64\.7\.7\)_\)_K\(3\.3\)_S\(2\.2\)_PB\((0|1)\.(0|1)\)_PE\(0\.0\)_D=\(2\.2\)_O=64_G=2_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=brgconv_avx512_amx_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
// TODO: 56827. Sporadic test failures
R"(.*smoke_Conv.+_FP32.ConvolutionLayerCPUTest\.CompareWithRefs.*TS=\(\(.\.67.+\).*inFmts=n.+c.*_primitive=jit_avx2.*)",
// incorrect jit_uni_planar_convolution with dilation = {1, 2, 1} and output channel 1
Expand Down
Loading