This repository has been archived by the owner on Jul 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathkronmult1_xbatched.hpp
68 lines (55 loc) · 1.99 KB
/
kronmult1_xbatched.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#ifndef KRONMULT1_XBATCHED_HPP
#define KRONMULT1_XBATCHED_HPP 1
#include "kroncommon.hpp"
#include "kronmult1.hpp"
// --------------------------------------------------------------------
// Performs Y(:,k) = kron(A1(k)) * X(:,k), k=1:batchCount
// Note result in Y but X and W may be modified as temporary work space
// --------------------------------------------------------------------
template<typename T>
GLOBAL_FUNCTION
void kronmult1_xbatched(
int const n,
T const * const Aarray_[],
int const lda,
T* pX_[],
T* pY_[],
T* pW_[],
int const batchCount)
//
// conceptual shape of Aarray is T * Aarray(ndim,batchCount)
// pX_[] is array of pointers, each X_ is n^1
// pY_[] is array of pointers, each Y_ is n^1
// pW_[] is array of pointers, each W_ is n^1
// Y_ is output
// X_ is input but can be modified
// W_ is work space
{
#ifdef USE_GPU
// -------------------------------------------
// note 1-based matlab convention for indexing
// -------------------------------------------
int const iz_start = blockIdx.x + 1;
int const iz_size = gridDim.x;
expect( gridDim.y == 1 );
expect( gridDim.z == 1 );
#else
int const iz_start = 1;
int const iz_size = 1;
#endif
int const ndim = 1;
auto Aarray = [&] (int const i1,
int const i2
) -> T const * const & {
return( Aarray_[ indx2f(i1,i2,ndim) ] );
};
for(int ibatch=iz_start; ibatch <= batchCount; ibatch += iz_size) {
T* const Xp = pX_[ (ibatch-1) ];
T* const Yp = pY_[ (ibatch-1) ];
T* const Wp = pW_[ (ibatch-1) ];
T const * const A1 = Aarray(1,ibatch);
int const nvec = 1;
kronmult1( n, nvec, A1, Xp, Yp, Wp, lda );
};
}
#endif