This repository has been archived by the owner on Jul 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathkroncommon.hpp
161 lines (127 loc) · 3.02 KB
/
kroncommon.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#ifndef KRONCOMMON_HPP
#define KRONCOMMON_HPP 1
#ifdef USE_GPU
#include <cuda.h>
#include <cuda_runtime.h>
#define GLOBAL_FUNCTION __global__
#define SYNCTHREADS __syncthreads()
#define SHARED_MEMORY __shared__
#define DEVICE_FUNCTION __device__
#define HOST_FUNCTION __host__
#else
#define GLOBAL_FUNCTION
#define SYNCTHREADS
#define SHARED_MEMORY
#define DEVICE_FUNCTION
#define HOST_FUNCTION
#endif
#include <cassert>
#include <cstdlib>
#include <cmath>
#include <algorithm>
#include <vector>
// simple layer over assert to prevent unused variable warnings when
// expects disabled
#ifndef NDEBUG
#define expect(cond) assert(cond)
#else
#define expect(cond) ((void)(cond))
#endif
#ifndef USE_GPU
static inline
double atomicAdd(double volatile *p, double dvalue)
{
double oldvalue = 0;
#pragma omp atomic capture
{
oldvalue = (*p);
(*p) += dvalue;
}
return(oldvalue);
}
static inline
float atomicAdd( float volatile *p, float dvalue)
{
float oldvalue = 0;
#pragma omp atomic capture
{
oldvalue = (*p);
(*p) += dvalue;
}
return(oldvalue);
}
#endif
#ifdef USE_LAMBDA
static inline
HOST_FUNCTION DEVICE_FUNCTION
int indx2f( int const i,
int const j,
int const ld )
{
// return( i-1+(j-1)*ld );
return( i + j*ld - (1 + ld) );
}
#else
#define indx2f(i,j,ld) ( (i) + (j) * (ld) - (1 + (ld)) )
#endif
static inline
HOST_FUNCTION DEVICE_FUNCTION
int indx3f( int const i1,
int const i2,
int const i3,
int const n1,
int const n2 )
{
return(indx2f(i1,i2,n1) +
((i3)-1)*((n1)*(n2)) );
// return( ((i3-1)*n2 + (i2-1))*n1-1+i1 );
}
static inline
HOST_FUNCTION DEVICE_FUNCTION
int indx4f( int const i1,
int const i2,
int const i3,
int const i4,
int const n1,
int const n2,
int const n3 )
{
return(indx3f(i1,i2,i3,n1,n2) +
((i4)-1)*((n1)*(n2)*(n3)) );
//return( (((i4-1)*n3 + (i3-1))*n2 + (i2-1))*n1 - 1 + i1 );
}
static inline
HOST_FUNCTION DEVICE_FUNCTION
int indx5f( int const i1,
int const i2,
int const i3,
int const i4,
int const i5,
int const n1,
int const n2,
int const n3,
int const n4 )
{
return( indx4f(i1,i2,i3,i4, n1,n2,n3) +
(i5-1)*(((n1*n2)*n3)*n4) );
// return( ((((i5-1)*n4 + (i4-1))*n3 + (i3-1))*n2 + (i2-1))*n1 -1 + i1 );
}
static inline
HOST_FUNCTION DEVICE_FUNCTION
int indx6f(int const i1,
int const i2,
int const i3,
int const i4,
int const i5,
int const i6,
int const n1,
int const n2,
int const n3,
int const n4,
int const n5)
{
return( indx5f(i1,i2,i3,i4,i5, n1,n2,n3,n4) +
(i6-1)*((((n1*n2)*n3)*n4)*n5) );
// return( (((((i6-1)*n5 + (i5-1))*n4 + (i4-1))*n3 + (i3-1))*n2 + (i2-1))*n1 -1 + i1);
}
#endif