Skip to content

Commit

Permalink
adds cross compilation clauses for sve; SVE capability detection now …
Browse files Browse the repository at this point in the history
…using __ARM_FEATURE_SVE
  • Loading branch information
NishantPrabhuFujitsu committed Oct 29, 2024
1 parent 5179f7d commit 5bc4fae
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ bin/
doc/
docs/build_documentation/work_dir/
temp/
fj/
.repo/
CMakeLists.txt.user
docs/IE_PLUGIN_DG/html/
Expand Down
32 changes: 32 additions & 0 deletions cmake/developer_package/compile_flags/os_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,38 @@ macro(ov_arm_neon_fp16_optimization_flags flags)
endif()
endmacro()

#
# ov_arm_sve_optimization_flags(<output flags>)
#
macro(ov_arm_sve_optimization_flags flags)
if(OV_COMPILER_IS_INTEL_LLVM)
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# nothing to define; works out of box
elseif(ANDROID)
if(ANDROID_ABI STREQUAL "arm64-v8a")
set(${flags} -march=armv8-a+sve -Wno-unused-command-line-argument)
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
if(AARCH64)
set(${flags} -O2 -march=armv8-a+sve)
if(NOT CMAKE_CL_64)
list(APPEND ${flags} -ftree-vectorize)
endif()
# Check for SVE support
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(${flags} ${${flags}} -march=armv8-a+sve)
else()
message(WARNING "SVE is not supported on this architecture.")
endif()
elseif(ARM)
message(WARNING "SVE is not supported on 32-bit ARM architectures.")
endif()
endif()
endmacro()

#
# ov_disable_all_warnings(<target1 [target2 target3 ...]>)
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(_CPU_CHECK_ANY "true")
set(_CPU_CHECK_SSE42 "with_cpu_x86_sse42()")
set(_CPU_CHECK_AVX "with_cpu_x86_avx()")
set(_CPU_CHECK_NEON_FP16 "with_cpu_neon_fp16()")
set(_CPU_CHECK_SVE "with_cpu_sve()")
set(_CPU_CHECK_AVX2 "with_cpu_x86_avx2()")
set(_CPU_CHECK_AVX512F "with_cpu_x86_avx512f()")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
#

## list of available instruction sets
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16)
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16 SVE)

set(_ACCEPTED_ARCHS_ANY "^(ANY)$")
set(_ACCEPTED_ARCHS_SSE42 "^(ANY|SSE42)$")
set(_ACCEPTED_ARCHS_AVX "^(ANY|SSE42|AVX)$")
set(_ACCEPTED_ARCHS_AVX2 "^(ANY|SSE42|AVX|AVX2)$")
set(_ACCEPTED_ARCHS_AVX512F "^(ANY|SSE42|AVX|AVX2|AVX512F)$")
set(_ACCEPTED_ARCHS_NEON_FP16 "^(ANY|NEON_FP16)$")
set(_ACCEPTED_ARCHS_SVE "^(ANY|SVE)$")

## Arch specific definitions
set(_DEFINE_ANY "")
Expand All @@ -19,12 +20,14 @@ set(_DEFINE_AVX "HAVE_AVX" ${_DEFINE_SSE42})
set(_DEFINE_AVX2 "HAVE_AVX2" ${_DEFINE_AVX})
set(_DEFINE_AVX512F "HAVE_AVX512F" ${_DEFINE_AVX2})
set(_DEFINE_NEON_FP16 "HAVE_NEON_FP16" ${_DEFINE_ANY})
set(_DEFINE_SVE "HAVE_SVE" ${_DEFINE_SVE})

## Arch specific compile options
ov_avx512_optimization_flags(_FLAGS_AVX512F)
ov_avx2_optimization_flags (_FLAGS_AVX2)
ov_sse42_optimization_flags (_FLAGS_SSE42)
ov_arm_neon_fp16_optimization_flags(_FLAGS_NEON_FP16)
ov_arm_sve_optimization_flags(_FLAGS_SVE)
set(_FLAGS_AVX "") ## TBD is not defined for OV project yet
set(_FLAGS_ANY "") ##

Expand Down Expand Up @@ -185,6 +188,8 @@ endfunction()
function(_currently_requested_top_arch VAR)
if(ENABLE_NEON_FP16)
set(RES NEON_FP16)
elseif(ENABLE_SVE)
set(RES SVE)
elseif(ENABLE_AVX512F)
set(RES AVX512F)
elseif(ENABLE_AVX2)
Expand Down
2 changes: 2 additions & 0 deletions cmake/developer_package/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ ov_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR

ov_dependent_option(ENABLE_NEON_FP16 "Enable ARM FP16 optimizations" ON "AARCH64" OFF)

ov_dependent_option(ENABLE_SVE "Enable SVE optimizations" ON "AARCH64" OFF)

# Type of build, we add this as an explicit option to default it to ON
get_property(BUILD_SHARED_LIBS_DEFAULT GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
ov_option (BUILD_SHARED_LIBS "Build as a shared library" ${BUILD_SHARED_LIBS_DEFAULT})
Expand Down
28 changes: 2 additions & 26 deletions src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -277,30 +277,6 @@ target_include_directories(${TARGET_NAME} SYSTEM PRIVATE $<TARGET_PROPERTY:dnnl,
# is not (yet) needed.
target_include_directories(${TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::reference,INTERFACE_INCLUDE_DIRECTORIES>)

# Check if SVE is available for AARCH64; compile with that if yes.
if (ARM OR AARCH64)
execute_process(
COMMAND lscpu
OUTPUT_VARIABLE CPUINFO
OUTPUT_STRIP_TRAILING_WHITESPACE
)
string(FIND "${CPUINFO}" "Flags:" FLAGS_INDEX)
string(SUBSTRING "${CPUINFO}" ${FLAGS_INDEX} -1 CPU_FLAGS)
string(FIND "${CPU_FLAGS}" "sve" ISA_FOUND)

if (NOT ISA_FOUND EQUAL -1)
set(HAVE_SVE ON)
else()
set(HAVE_SVE OFF)
endif()

if (HAVE_SVE)
message(STATUS "[AARCH64] ISA SVE detected")
target_compile_options(${TARGET_NAME} PRIVATE "-march=armv8.4-a+sve")
add_definitions(-DHAVE_SVE)
endif()
endif()

# Cross compiled function
# TODO: The same for proposal, proposalONNX, topk
cross_compiled_file(${TARGET_NAME}
Expand All @@ -311,14 +287,14 @@ cross_compiled_file(${TARGET_NAME}
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 NEON_FP16 ANY
ARCH AVX512F AVX2 NEON_FP16 SVE ANY
src/nodes/kernels/scaled_attn/softmax.cpp
API src/nodes/kernels/scaled_attn/softmax.hpp
NAME attn_softmax
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 NEON_FP16 ANY
ARCH AVX512F AVX2 NEON_FP16 SVE ANY
src/nodes/kernels/scaled_attn/mha_single_token.cpp
API src/nodes/kernels/scaled_attn/mha_single_token.hpp
NAME mha_single_token
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "openvino/core/type/float16.hpp"

#if defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
#if defined(__ARM_FEATURE_SVE)
#include "arm_sve.h"
#endif
#include "arm_neon.h"
Expand Down Expand Up @@ -249,7 +249,7 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16);
#endif

#ifdef OPENVINO_ARCH_ARM64
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
inline svfloat32_t exp_ps_sve(svbool_t& pg, svfloat32_t& src) {
// Constants
const auto log2_e = svdup_n_f32(1.4426950409f);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "softmax_kernel.hpp"

#if defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
#if defined(__ARM_FEATURE_SVE)
# include <arm_sve.h>
#endif
# include <arm_neon.h>
Expand Down Expand Up @@ -62,7 +62,7 @@ void cvt_copy(TA* dst, TB* src, size_t n) {
mm256_uni_storeu_ps(dst + i, vb);
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
size_t vec_len_f32_sve = svcntw();
auto _dst = reinterpret_cast<float32_t*>(dst);
size_t inc = vec_len_f32_sve;
Expand Down Expand Up @@ -122,7 +122,7 @@ static void attn_acc_value(float* out, float weight, T* v, size_t S, float* scal
mm256_uni_storeu_ps(out + i, v_out);
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
size_t vec_len_f32_sve = svcntw();
auto _v = reinterpret_cast<float32_t*>(v);
svfloat32_t attn_w_vec_fp32 = svdup_n_f32(weight);
Expand Down Expand Up @@ -403,7 +403,7 @@ static float sum_q_head(T* a, size_t n) {
hsum(vsum0);
sum = _mm256_cvtss_f32(vsum0);
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
size_t vec_len_f32_sve = svcntw();
svfloat32_t sum0 = svdup_n_f32(0.0f);
svfloat32_t sum1 = svdup_n_f32(0.0f);
Expand Down Expand Up @@ -588,7 +588,7 @@ static float dot_product(TA* a, TB* b, size_t n, float* scale, float* zp, float*
sum = _mm256_cvtss_f32(vsum0);

#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
size_t vec_len_f32_sve = svcntw();
svbool_t pg = svptrue_b32();
svfloat32_t sum0 = svdup_n_f32(0.0f);
Expand Down Expand Up @@ -939,7 +939,7 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str
mm256_uni_storeu_ps(dst + i, result_vec_fp32);
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
size_t vec_len_f32_sve = svcntw();
auto _dst = reinterpret_cast<float32_t*>(dst);
size_t inc = vec_len_f32_sve;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <vector>

#if defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
#if defined(__ARM_FEATURE_SVE)
#include "arm_sve.h"
#endif
#include "arm_neon.h"
Expand Down Expand Up @@ -659,7 +659,7 @@ inline void exp_reduce_sum(float* a, const float max, const size_t size, float&
hsum(v_sum);
sum = _mm256_cvtss_f32(v_sum);
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
svfloat32_t v_a;
svfloat32_t v_max = svdup_n_f32(max);
svfloat32_t v_sum = svdup_n_f32(0.0f);
Expand Down Expand Up @@ -805,7 +805,7 @@ inline void multiply_scalar(float* a, float* a_dst, const float val, const size_
i += (size - i);
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
#if defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
size_t vec_len_f32_sve = svcntw();
svfloat32_t v_scale = svdup_n_f32(val);
size_t inc = vec_len_f32_sve;
Expand Down

0 comments on commit 5bc4fae

Please sign in to comment.