Skip to content

Commit

Permalink
[CLANG_FORMAT] Enable clang-format for TPP adaptation source code (#2…
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorDuplensky authored Jan 17, 2025
1 parent 0848f86 commit 5e013ea
Show file tree
Hide file tree
Showing 31 changed files with 464 additions and 396 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/code_style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ jobs:
sudo apt update
sudo apt --assume-yes install clang-format-15
# Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector
# Run cmake with extra options to cover as much source code as possible:
# - -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT to enable codestyle check for ITT collector
# - -DENABLE_SNIPPETS_LIBXSMM_TPP to cover snippets TPP adaptation
- name: CMake configure
run: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -B build
run: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DENABLE_SNIPPETS_LIBXSMM_TPP=ON -B build

- name: Create code style diff
run: cmake --build build --target clang_format_fix_all -j8
Expand Down Expand Up @@ -54,9 +56,11 @@ jobs:
sudo apt update
sudo apt --assume-yes install binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu scons clang-format-15
# Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector
# Run cmake with extra options to cover as much source code as possible:
# - -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT to enable codestyle check for ITT collector
# - -DENABLE_SNIPPETS_LIBXSMM_TPP to cover snippets TPP adaptation
- name: CMake configure
run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64
run: cmake -DENABLE_CLANG_FORMAT=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DENABLE_SNIPPETS_LIBXSMM_TPP=ON -DCMAKE_TOOLCHAIN_FILE=cmake/arm64.toolchain.cmake -B build_arm64

- name: Create code style diff
run: cmake --build build_arm64 --target clang_format_fix_all -j8
Expand Down
46 changes: 23 additions & 23 deletions src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

#include "jit_brgemm_emitter.hpp"

#include "emitters/snippets/x64/jit_snippets_emitters.hpp"
#include "transformations/tpp/x64/op/brgemm.hpp"

Expand All @@ -28,18 +29,15 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres
const auto& input_1_desc = expr->get_input_port_descriptor(1);
const auto& output_desc = expr->get_output_port_descriptor(0);

std::vector<size_t> leading_dimensions {brgemm_node->get_input_stride(0),
brgemm_node->get_input_stride(1),
brgemm_node->get_output_stride(0)};
std::vector<size_t> leading_dimensions{brgemm_node->get_input_stride(0),
brgemm_node->get_input_stride(1),
brgemm_node->get_output_stride(0)};

auto in_0_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0));
auto in_1_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1));
exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ?
LIBXSMM_DATATYPE_I32 :
LIBXSMM_DATATYPE_F32;
auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ?
LIBXSMM_DATATYPE_I32 :
LIBXSMM_DATATYPE_F32;
exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ? LIBXSMM_DATATYPE_I32
: LIBXSMM_DATATYPE_F32;
auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ? LIBXSMM_DATATYPE_I32 : LIBXSMM_DATATYPE_F32;

const auto beta = brgemm_node->get_beta();
OV_CPU_JIT_EMITTER_ASSERT(beta == 0 || beta == 1, "Detected unsupported beta value: " + std::to_string(beta));
Expand All @@ -54,18 +52,14 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres
const auto N = static_cast<libxsmm_blasint>(*subtensor_in1.rbegin());

const bool is_f32_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_F32;
const bool is_bf16_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_BF16;
const bool is_bf16_gemm = in_0_prec == in_1_prec && in_0_prec == LIBXSMM_DATATYPE_BF16;
const bool is_i8_gemm = in_0_prec == LIBXSMM_DATATYPE_U8 || in_0_prec == LIBXSMM_DATATYPE_I8;
OV_CPU_JIT_EMITTER_ASSERT(is_f32_gemm ||
(is_bf16_gemm && K % 2 == 0) ||
(is_i8_gemm && K % 4 == 0),
OV_CPU_JIT_EMITTER_ASSERT(is_f32_gemm || (is_bf16_gemm && K % 2 == 0) || (is_i8_gemm && K % 4 == 0),
"Unsupported parameter combination for kernel configuration");

m_compile_flags = is_f32_gemm ?
LIBXSMM_GEMM_FLAGS('N', 'N') :
LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N') |
LIBXSMM_GEMM_FLAG_NO_SETUP_TILECONFIG |
LIBXSMM_GEMM_FLAG_NO_RESET_TILECONFIG;
m_compile_flags = is_f32_gemm ? LIBXSMM_GEMM_FLAGS('N', 'N')
: LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N') |
LIBXSMM_GEMM_FLAG_NO_SETUP_TILECONFIG | LIBXSMM_GEMM_FLAG_NO_RESET_TILECONFIG;

if (beta == 0)
m_compile_flags |= LIBXSMM_GEMM_FLAG_BETA_0;
Expand All @@ -79,9 +73,15 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres
m_compile_flags |= LIBXSMM_GEMM_FLAG_B_UNSIGNED;
}

m_shape = libxsmm_create_gemm_shape(N, M, K,
io_strides[1], io_strides[0], io_strides[2],
in_1_prec, in_0_prec, out_0_prec,
m_shape = libxsmm_create_gemm_shape(N,
M,
K,
io_strides[1],
io_strides[0],
io_strides[2],
in_1_prec,
in_0_prec,
out_0_prec,
exec_dtype);
m_prefetching_flags = LIBXSMM_GEMM_PREFETCH_NONE;
}
Expand All @@ -91,7 +91,7 @@ std::set<std::vector<element::Type>> BrgemmTppEmitter::get_supported_precisions(
return {{element::f32, element::f32}};
}

void BrgemmTppEmitter::validate_arguments(const std::vector<size_t> &in, const std::vector<size_t> &out) const {
void BrgemmTppEmitter::validate_arguments(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
OV_CPU_JIT_EMITTER_ASSERT(in.size() == 2, "Expects 2 input regs, got" + std::to_string(in.size()));
OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Expects 1 output reg, got" + std::to_string(out.size()));
}
Expand All @@ -100,7 +100,7 @@ const uintptr_t BrgemmTppEmitter::get_compiled_kernel_ptr() const {
return COMPILE_TPP_KERNEL(libxsmm_dispatch_gemm(m_shape, m_compile_flags, m_prefetching_flags));
}

void BrgemmTppEmitter::execute_brgemm_kernel(libxsmm_gemmfunction brg_kernel, void *in0, void *in1, void *out0) {
void BrgemmTppEmitter::execute_brgemm_kernel(libxsmm_gemmfunction brg_kernel, void* in0, void* in1, void* out0) {
libxsmm_gemm_param gemm_p;
gemm_p.a.primary = in1;
gemm_p.b.primary = in0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

#include "jit_scalar_emitter.hpp"

#include "emitters/snippets/x64/jit_snippets_emitters.hpp"

using namespace Xbyak;
Expand Down
15 changes: 10 additions & 5 deletions src/plugins/intel_cpu/src/emitters/tpp/x64/jit_scalar_emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//

#pragma once
#include "snippets/lowered/expression.hpp"
#include "emitters/plugin/x64/jit_emitter.hpp"
#include "snippets/lowered/expression.hpp"

namespace ov {
namespace intel_cpu {
Expand All @@ -13,11 +13,16 @@ class ScalarTppEmitter : public jit_emitter {
ScalarTppEmitter(dnnl::impl::cpu::x64::jit_generator* h,
dnnl::impl::cpu::x64::cpu_isa_t isa,
const ov::snippets::lowered::ExpressionPtr& expr);
size_t get_inputs_num() const override {return 0;}
size_t aux_gprs_count() const override {return 1;}
size_t get_inputs_num() const override {
return 0;
}
size_t aux_gprs_count() const override {
return 1;
}

private:
void emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const override;
};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
58 changes: 34 additions & 24 deletions src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,60 @@
//

#include "brgemm.hpp"

#include "snippets/itt.hpp"
#include "snippets/utils/utils.hpp"
#include "snippets/lowered/port_descriptor.hpp"
#include "snippets/utils/utils.hpp"
#include "utils/general_utils.h"


namespace ov {
namespace intel_cpu {
namespace tpp {
namespace op {

BrgemmTPP::BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
const size_t offset_a, const size_t offset_b, const size_t offset_c,
std::vector<size_t> layout_a, std::vector<size_t> layout_b, std::vector<size_t> layout_c,
BrgemmTPP::BrgemmTPP(const Output<Node>& A,
const Output<Node>& B,
const size_t offset_a,
const size_t offset_b,
const size_t offset_c,
std::vector<size_t> layout_a,
std::vector<size_t> layout_b,
std::vector<size_t> layout_c,
const float beta)
: MemoryAccess(std::set<size_t>{0, 1}, std::set<size_t>{0}),
modifier::TensorProcessingPrimitive(),
Brgemm(A, B,
offset_a, offset_b, offset_c,
std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
Brgemm(A, B, offset_a, offset_b, offset_c, std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
set_beta(beta);
}

BrgemmTPP::BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
const PortDescriptor& desc_a, const PortDescriptor& desc_b, const PortDescriptor& desc_c,
std::vector<size_t> layout_a, std::vector<size_t> layout_b, std::vector<size_t> layout_c,
BrgemmTPP::BrgemmTPP(const Output<Node>& A,
const Output<Node>& B,
const PortDescriptor& desc_a,
const PortDescriptor& desc_b,
const PortDescriptor& desc_c,
std::vector<size_t> layout_a,
std::vector<size_t> layout_b,
std::vector<size_t> layout_c,
const float beta)
: MemoryAccess(PortMap{{0, desc_a}, {1, desc_b}}, PortMap{{0, desc_c}}),
modifier::TensorProcessingPrimitive(),
Brgemm(A, B,
desc_a, desc_b, desc_c,
std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
Brgemm(A, B, desc_a, desc_b, desc_c, std::move(layout_a), std::move(layout_b), std::move(layout_c)) {
set_beta(beta);
}

std::shared_ptr<Node> BrgemmTPP::clone_with_new_inputs(const OutputVector& new_args) const {
INTERNAL_OP_SCOPE(BrgemmTPP_clone_with_new_inputs);
check_new_args_count(this, new_args);
return std::make_shared<BrgemmTPP>(new_args.at(0), new_args.at(1),
get_input_port_descriptor(0), get_input_port_descriptor(1), get_output_port_descriptor(0),
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(0))->get_layout(),
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(1))->get_layout(),
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(output(0))->get_layout(),
m_beta);
return std::make_shared<BrgemmTPP>(
new_args.at(0),
new_args.at(1),
get_input_port_descriptor(0),
get_input_port_descriptor(1),
get_output_port_descriptor(0),
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(0))->get_layout(),
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input(1))->get_layout(),
snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(output(0))->get_layout(),
m_beta);
}

bool BrgemmTPP::visit_attributes(AttributeVisitor& visitor) {
Expand All @@ -55,7 +65,7 @@ bool BrgemmTPP::visit_attributes(AttributeVisitor& visitor) {
return Brgemm::visit_attributes(visitor);
}

} // namespace op
} // namespace tpp
} // namespace intel_cpu
} // namespace ov
} // namespace op
} // namespace tpp
} // namespace intel_cpu
} // namespace ov
40 changes: 27 additions & 13 deletions src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

#pragma once

#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
#include "modifiers.hpp"
#include "transformations/snippets/x64/op/brgemm_cpu.hpp"

namespace ov {
namespace intel_cpu {
Expand All @@ -22,28 +22,42 @@ class BrgemmTPP : virtual public modifier::TensorProcessingPrimitive, public sni
public:
OPENVINO_OP("Brgemm", "TppOpset", snippets::op::Brgemm);

BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
size_t offset_a = 0, size_t offset_b = 0, size_t offset_c = 0,
std::vector<size_t> layout_a = {}, std::vector<size_t> layout_b = {}, std::vector<size_t> layout_c = {},
BrgemmTPP(const Output<Node>& A,
const Output<Node>& B,
size_t offset_a = 0,
size_t offset_b = 0,
size_t offset_c = 0,
std::vector<size_t> layout_a = {},
std::vector<size_t> layout_b = {},
std::vector<size_t> layout_c = {},
float beta = 1);
BrgemmTPP(const Output<Node>& A, const Output<Node>& B,
const PortDescriptor& desc_a, const PortDescriptor& desc_b, const PortDescriptor& desc_c,
std::vector<size_t> layout_a = {}, std::vector<size_t> layout_b = {}, std::vector<size_t> layout_c = {},
BrgemmTPP(const Output<Node>& A,
const Output<Node>& B,
const PortDescriptor& desc_a,
const PortDescriptor& desc_b,
const PortDescriptor& desc_c,
std::vector<size_t> layout_a = {},
std::vector<size_t> layout_b = {},
std::vector<size_t> layout_c = {},
float beta = 1);
BrgemmTPP() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

bool visit_attributes(AttributeVisitor& visitor) override;

float get_beta() const { return m_beta; }
void set_beta(float beta) { m_beta = beta; }
float get_beta() const {
return m_beta;
}
void set_beta(float beta) {
m_beta = beta;
}

private:
float m_beta = 0.f;
};

} // namespace op
} // namespace tpp
} // namespace intel_cpu
} // namespace ov
} // namespace op
} // namespace tpp
} // namespace intel_cpu
} // namespace ov
Loading

0 comments on commit 5e013ea

Please sign in to comment.