From 3b833f7ee120d6d663b67a32a30bb680b02004b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Scipione?= Date: Mon, 15 Jul 2024 03:07:15 -0700 Subject: [PATCH] Disable txsv intel gpu (#524) * Add runtime support check for discrete Intel GPUs on txsv operators * Enabling txsv operators for iGPU with DEFAULT TUNING TARGET Since the support for txsv is checked before calling the implementation, exception for Arc and GPU Max is raised before so it is possible to have a default configuration working on iGPUs. --------- Signed-off-by: nscipione --- include/blas_meta.h | 12 ++++++++++++ include/portblas_helper.h | 22 ++++++++++++++++++++++ src/interface/blas2/backend/default.hpp | 15 +++++++++------ src/interface/blas2_interface.hpp | 4 ++++ test/unittest/blas2/blas2_tbsv_test.cpp | 14 +++++++++----- test/unittest/blas2/blas2_tpsv_test.cpp | 13 ++++++++----- test/unittest/blas2/blas2_trsv_test.cpp | 12 ++++++++---- 7 files changed, 72 insertions(+), 20 deletions(-) diff --git a/include/blas_meta.h b/include/blas_meta.h index 28ceb8cf9..ba0fb4371 100644 --- a/include/blas_meta.h +++ b/include/blas_meta.h @@ -222,6 +222,18 @@ struct is_complex_std #endif +class unsupported_exception : public std::runtime_error { + public: + unsupported_exception(const std::string &operator_name) + : std::runtime_error(operator_name), _msg(operator_name) { + _msg += " operator currently not supported on selected device"; + }; + const char *what() const noexcept override { return _msg.c_str(); } + + private: + std::string _msg{}; +}; + } // namespace blas #endif // BLAS_META_H diff --git a/include/portblas_helper.h b/include/portblas_helper.h index 1d322fe19..535320868 100644 --- a/include/portblas_helper.h +++ b/include/portblas_helper.h @@ -228,6 +228,28 @@ inline bool is_malloc_shared(sb_handle_t &sb_handle, const containerT _rs) { } } +/* + @brief Check device and throw unsupported exception if Intel discrete GPU + @param sb_handle portBLAS handler + @param operator_name unsupported operator name + */ +template +inline void throw_unsupported_intel_dGPU(const sb_handle_t &sb_handle, + std::string &&operator_name) { + const auto device = sb_handle.get_queue().get_device(); + if (device.is_gpu()) { + const std::string vendor = + device.template get_info(); + if (vendor.find("Intel") != vendor.npos) { + const std::string name = + device.template get_info(); + if (name.find("Arc") != name.npos || name.find("GPU Max") != name.npos) { + throw unsupported_exception(operator_name); + } + } + } +} + } // end namespace helper } // end namespace blas #endif // PORTBLAS_HELPER_H diff --git a/src/interface/blas2/backend/default.hpp b/src/interface/blas2/backend/default.hpp index 77d159dd3..c63cde9ac 100644 --- a/src/interface/blas2/backend/default.hpp +++ b/src/interface/blas2/backend/default.hpp @@ -145,8 +145,9 @@ typename sb_handle_t::event_t _trsv( return blas::internal::_trsv_impl<32, 4, uplo, trn, diag>( sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies); } else { - throw std::runtime_error( - "Trsv operator currently not supported on Intel GPUs"); + // This configuration works only for Intel iGPU + return blas::internal::_trsv_impl<8, 4, uplo, trn, diag>( + sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies); } } else { return blas::internal::_trsv_impl<4, 2, uplo, trn, diag>( @@ -173,8 +174,9 @@ typename sb_handle_t::event_t _tbsv( return blas::internal::_tbsv_impl<32, 4, uplo, trn, diag>( sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies); } else { - throw std::runtime_error( - "Tbsv operator currently not supported on Intel GPUs"); + // This configuration works only for Intel iGPU + return blas::internal::_tbsv_impl<8, 4, uplo, trn, diag>( + sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies); } } else { return blas::internal::_tbsv_impl<4, 2, uplo, trn, diag>( @@ -200,8 +202,9 @@ typename sb_handle_t::event_t _tpsv( return blas::internal::_tpsv_impl<32, 4, uplo, trn, diag>( sb_handle, _N, _mA, _vx, _incx, _dependencies); } else { - throw std::runtime_error( - "Tpsv operator currently not supported on Intel GPUs"); + // This configuration works only for Intel iGPU + return blas::internal::_tpsv_impl<8, 4, uplo, trn, diag>( + sb_handle, _N, _mA, _vx, _incx, _dependencies); } } else { return blas::internal::_tpsv_impl<4, 2, uplo, trn, diag>( diff --git a/src/interface/blas2_interface.hpp b/src/interface/blas2_interface.hpp index 16cc32bc4..92e3df672 100644 --- a/src/interface/blas2_interface.hpp +++ b/src/interface/blas2_interface.hpp @@ -33,6 +33,7 @@ #include "operations/blas2_trees.h" #include "operations/blas_constants.h" #include "operations/blas_operators.hpp" +#include "portblas_helper.h" #include "sb_handle/portblas_handle.h" #include "views/view.h" #include @@ -1252,6 +1253,7 @@ typename sb_handle_t::event_t inline _trsv( sb_handle_t& sb_handle, char _Uplo, char _trans, char _Diag, index_t _N, container_t0 _mA, index_t _lda, container_t1 _vx, increment_t _incx, const typename sb_handle_t::event_t& _dependencies) { + helper::throw_unsupported_intel_dGPU(sb_handle, "trsv"); INST_UPLO_TRANS_DIAG(blas::trsv::backend::_trsv, sb_handle, _N, _mA, _lda, _vx, _incx, _dependencies) } @@ -1417,6 +1419,7 @@ typename sb_handle_t::event_t _tbsv( sb_handle_t& sb_handle, char _Uplo, char _trans, char _Diag, index_t _N, index_t _K, container_t0 _mA, index_t _lda, container_t1 _vx, increment_t _incx, const typename sb_handle_t::event_t& _dependencies) { + helper::throw_unsupported_intel_dGPU(sb_handle, "tbsv"); INST_UPLO_TRANS_DIAG(blas::tbsv::backend::_tbsv, sb_handle, _N, _K, _mA, _lda, _vx, _incx, _dependencies) } @@ -1437,6 +1440,7 @@ typename sb_handle_t::event_t _tpsv( sb_handle_t& sb_handle, char _Uplo, char _trans, char _Diag, index_t _N, container_t0 _mA, container_t1 _vx, increment_t _incx, const typename sb_handle_t::event_t& _dependencies) { + helper::throw_unsupported_intel_dGPU(sb_handle, "tpsv"); INST_UPLO_TRANS_DIAG(blas::tpsv::backend::_tpsv, sb_handle, _N, _mA, _vx, _incx, _dependencies) } diff --git a/test/unittest/blas2/blas2_tbsv_test.cpp b/test/unittest/blas2/blas2_tbsv_test.cpp index 9317caa94..e8380bf2c 100644 --- a/test/unittest/blas2/blas2_tbsv_test.cpp +++ b/test/unittest/blas2/blas2_tbsv_test.cpp @@ -83,11 +83,15 @@ void run_test(const combination_t combi) { auto copy_v = blas::helper::copy_to_device(q, x_v.data(), v_x_gpu, x_size); - // SYCL TBSV - auto tbsv_event = - _tbsv(sb_handle, *uplo_str, *t_str, *diag_str, n, k, m_a_gpu, - (k + 1) * lda_mul, v_x_gpu, incX, {copy_m, copy_v}); - sb_handle.wait(tbsv_event); + try { + // SYCL TBSV + auto tbsv_event = + _tbsv(sb_handle, *uplo_str, *t_str, *diag_str, n, k, m_a_gpu, + (k + 1) * lda_mul, v_x_gpu, incX, {copy_m, copy_v}); + sb_handle.wait(tbsv_event); + } catch (const blas::unsupported_exception& ue) { + GTEST_SKIP(); + } auto event = blas::helper::copy_to_host(sb_handle.get_queue(), v_x_gpu, x_v.data(), x_size); diff --git a/test/unittest/blas2/blas2_tpsv_test.cpp b/test/unittest/blas2/blas2_tpsv_test.cpp index 85e12c590..60d54a8df 100644 --- a/test/unittest/blas2/blas2_tpsv_test.cpp +++ b/test/unittest/blas2/blas2_tpsv_test.cpp @@ -93,12 +93,15 @@ void run_test(const combination_t combi) { auto copy_v = helper::copy_to_device(q, x_v.data(), v_x_gpu, x_size); - // SYCL TPSV - auto tpsv_event = _tpsv(sb_handle, *uplo_str, *t_str, - *diag_str, n, m_a_gpu, v_x_gpu, - incX, {copy_m, copy_v}); + try { + // SYCL TPSV + auto tpsv_event = _tpsv(sb_handle, *uplo_str, *t_str, *diag_str, n, m_a_gpu, + v_x_gpu, incX, {copy_m, copy_v}); - sb_handle.wait(tpsv_event); + sb_handle.wait(tpsv_event); + } catch (const blas::unsupported_exception& ue) { + GTEST_SKIP(); + } auto event = blas::helper::copy_to_host(sb_handle.get_queue(), v_x_gpu, x_v.data(), x_size); sb_handle.wait(event); diff --git a/test/unittest/blas2/blas2_trsv_test.cpp b/test/unittest/blas2/blas2_trsv_test.cpp index 9e5f2795d..cde471ae3 100644 --- a/test/unittest/blas2/blas2_trsv_test.cpp +++ b/test/unittest/blas2/blas2_trsv_test.cpp @@ -87,10 +87,14 @@ void run_test(const combination_t combi) { auto copy_v = blas::helper::copy_to_device(q, x_v.data(), v_x_gpu, x_size); - // SYCL TRSV - auto trsv_event = _trsv(sb_handle, *uplo_str, *t_str, *diag_str, n, m_a_gpu, - n * lda_mul, v_x_gpu, incX, {copy_m, copy_v}); - sb_handle.wait(trsv_event); + try { + // SYCL TRSV + auto trsv_event = _trsv(sb_handle, *uplo_str, *t_str, *diag_str, n, m_a_gpu, + n * lda_mul, v_x_gpu, incX, {copy_m, copy_v}); + sb_handle.wait(trsv_event); + } catch (const blas::unsupported_exception& ue) { + GTEST_SKIP(); + } auto event = blas::helper::copy_to_host(sb_handle.get_queue(), v_x_gpu, x_v.data(), x_size);