Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

Commit

Permalink
Refactor SPR/SPR2 index group_broadcast (#533)
Browse files Browse the repository at this point in the history
This patch substitutes the group_broadcast of two scalar indexes with the broadcast of a single vector value.
pgorlani authored Aug 12, 2024
1 parent fbce030 commit 8547059
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions src/operations/blas2/spr.hpp
Original file line number Diff line number Diff line change
@@ -101,20 +101,21 @@ typename rhs_1_t::value_t Spr<Single, isUpper, lhs_t, rhs_1_t, rhs_2_t>::eval(

index_t row = 0, col = 0;

#ifndef __ADAPTIVECPP__
#if (defined(INTEL_GPU) || defined(NVIDIA_GPU)) && not defined(__ADAPTIVECPP__)
if (!id) {
#endif
Spr<Single, isUpper, lhs_t, rhs_1_t, rhs_2_t>::compute_row_col(
global_idx, N_, row, col);
#ifndef __ADAPTIVECPP__
#if (defined(INTEL_GPU) || defined(NVIDIA_GPU)) && not defined(__ADAPTIVECPP__)
}

row = sycl::group_broadcast(ndItem.get_group(), row);
col = sycl::group_broadcast(ndItem.get_group(), col);
sycl::vec<index_t, 2> bcast_idxs{row, col};
bcast_idxs = sycl::group_broadcast(ndItem.get_group(), bcast_idxs);
row = bcast_idxs[0];
col = bcast_idxs[1];
#endif

if (global_idx < lhs_size) {
#ifndef __ADAPTIVECPP__
#if (defined(INTEL_GPU) || defined(NVIDIA_GPU)) && not defined(__ADAPTIVECPP__)
if constexpr (isUpper) {
if (id) {
row += id;

0 comments on commit 8547059

Please sign in to comment.