forked from dpw/skinny-mutex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperf.c
272 lines (215 loc) · 7.08 KB
/
perf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/* Simple performance tests to compare skinny mutexes, pthreads
* mutexes and pthrreads spinlocks.
*
* Compile with SKINNY, PTHREADS or SPINLOCK defined for what you want
* to measure.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#ifdef PERF_skinny
#include "skinny_mutex.h"
#else
#include <pthread.h>
#endif
#if defined(PERF_pthreads)
typedef pthread_mutex_t mutex_t;
static int mutex_init(mutex_t *mutex)
{
return pthread_mutex_init(mutex, NULL);
}
#define mutex_destroy pthread_mutex_destroy
#define mutex_lock pthread_mutex_lock
#define mutex_unlock pthread_mutex_unlock
#elif defined(PERF_skinny)
typedef skinny_mutex_t mutex_t;
#define mutex_init skinny_mutex_init
#define mutex_destroy skinny_mutex_destroy
#define mutex_lock skinny_mutex_lock
#define mutex_unlock skinny_mutex_unlock
#elif defined(PERF_spinlock)
typedef pthread_spinlock_t mutex_t;
static int mutex_init(mutex_t *mutex)
{
return pthread_spin_init(mutex, PTHREAD_PROCESS_PRIVATE);
}
#define mutex_destroy pthread_spin_destroy
#define mutex_lock pthread_spin_lock
#define mutex_unlock pthread_spin_unlock
#endif
struct test_results {
int reps;
long long start;
long long stop;
};
static long long now_usecs(void)
{
struct timeval tv;
assert(!gettimeofday(&tv, NULL));
return (long long)tv.tv_sec * 1000000 + tv.tv_usec;
}
/* Simply acquiring and releasing a lock, without any contention. */
static void lock_unlock(struct test_results *res)
{
mutex_t mutex;
int i;
assert(!mutex_init(&mutex));
res->start = now_usecs();
for (i = res->reps; i--;) {
assert(!mutex_lock(&mutex));
assert(!mutex_unlock(&mutex));
}
res->stop = now_usecs();
assert(!mutex_destroy(&mutex));
}
/* Robustly measuring the performance of contended locks is not as
* easy as it sounds. We can't simply have a few locks, and throw a
* larger number of threads at them, acquiring and releasing
* individual locks. This is because the lock types we are measuring
* do not guarantee fair behaviour. So what you can easily get is one
* thread that runs for a while, acquiring and releasing many times,
* while other threads sit waiting on locks without managing to
* acquire them. (This kind of thing is not a problem in real
* applications because they actually do useful work while holding
* locks, rather than acquiring and immediately releasing them.)
*
* So we need to reliably induce the interesting contention case:
* Every time a thread releases a lock, some other waiting thread
* acquires it and gets to run.
*
* We do this by having a set of locks arranged in a ring, with one
* more lock than there are threads involved. Each thread holds a
* lock, and also tries to acquire the next lock in the ring. When it
* acquires the next lock, it drops the previous lock. Then it tries
* to acquire the next next lock, and so on. The effect is that at
* every moment, only one thread is able to acquire two locks and so
* make progress; in doing so, it releases a lock allowing another
* thread to make progress and then promptly gets blocked.
*/
#define CONTENTION_THREAD_COUNT 4
#define CONTENTION_MUTEX_COUNT (CONTENTION_THREAD_COUNT + 1)
struct contention_info {
mutex_t mutexes[CONTENTION_MUTEX_COUNT];
pthread_mutex_t ready_mutex;
pthread_cond_t ready_cond;
int ready_count;
int thread_reps;
};
struct contention_thread_info {
struct contention_info *info;
pthread_mutex_t start_mutex;
int thread_index;
long long start;
long long stop;
};
static void *contention_thread(void *v_thread_info)
{
struct contention_thread_info *thread_info = v_thread_info;
struct contention_info *info = thread_info->info;
int i = thread_info->thread_index;
int reps = info->thread_reps;
int j;
/* Lock our first mutex */
assert(!mutex_lock(&info->mutexes[i]));
/* Indicate that we are ready for the test. */
assert(!pthread_mutex_lock(&info->ready_mutex));
if (++info->ready_count == CONTENTION_THREAD_COUNT)
assert(!pthread_cond_signal(&info->ready_cond));
assert(!pthread_mutex_unlock(&info->ready_mutex));
/* Line up to start */
assert(!pthread_mutex_lock(&thread_info->start_mutex));
assert(!pthread_mutex_unlock(&thread_info->start_mutex));
thread_info->start = now_usecs();
for (j = 1; j < reps; j++) {
int next = (i + 1) % CONTENTION_MUTEX_COUNT;
assert(!mutex_lock(&info->mutexes[next]));
assert(!mutex_unlock(&info->mutexes[i]));
i = next;
}
thread_info->stop = now_usecs();
assert(!mutex_unlock(&info->mutexes[i]));
return NULL;
}
static void contention(struct test_results *res)
{
struct contention_info info;
struct contention_thread_info thread_infos[CONTENTION_THREAD_COUNT];
pthread_t threads[CONTENTION_THREAD_COUNT];
int i;
for (i = 0; i < CONTENTION_MUTEX_COUNT; i++)
assert(!mutex_init(&info.mutexes[i]));
assert(!pthread_mutex_init(&info.ready_mutex, NULL));
assert(!pthread_cond_init(&info.ready_cond, NULL));
info.ready_count = 0;
info.thread_reps = res->reps / CONTENTION_THREAD_COUNT;
for (i = 0; i < CONTENTION_THREAD_COUNT; i++) {
thread_infos[i].info = &info;
thread_infos[i].thread_index = i;
assert(!pthread_mutex_init(&thread_infos[i].start_mutex, NULL));
assert(!pthread_mutex_lock(&thread_infos[i].start_mutex));
assert(!pthread_create(&threads[i], NULL,
contention_thread, &thread_infos[i]));
}
assert(!pthread_mutex_lock(&info.ready_mutex));
while (info.ready_count < CONTENTION_THREAD_COUNT)
assert(!pthread_cond_wait(&info.ready_cond,
&info.ready_mutex));
assert(!pthread_mutex_unlock(&info.ready_mutex));
for (i = 0; i < CONTENTION_THREAD_COUNT; i++)
assert(!pthread_mutex_unlock(&thread_infos[i].start_mutex));
for (i = 0; i < CONTENTION_THREAD_COUNT; i++) {
assert(!pthread_join(threads[i], NULL));
assert(!pthread_mutex_destroy(&thread_infos[i].start_mutex));
}
for (i = 0; i < CONTENTION_MUTEX_COUNT; i++)
assert(!mutex_destroy(&info.mutexes[i]));
assert(!pthread_mutex_destroy(&info.ready_mutex));
assert(!pthread_cond_destroy(&info.ready_cond));
res->start = thread_infos[0].start;
res->stop = thread_infos[0].stop;
for (i = 1; i < CONTENTION_THREAD_COUNT; i++) {
if (thread_infos[i].start < res->start)
res->start = thread_infos[i].start;
if (thread_infos[i].stop > res->stop)
res->stop = thread_infos[i].stop;
}
}
static int cmp_long_long(const void *ap, const void *bp)
{
long long a = *(long long *)ap;
long long b = *(long long *)bp;
if (a < b)
return -1;
else if (a > b)
return 1;
else
return 0;
}
#define SETS 10
static void measure(void (*test)(struct test_results *res),
const char *name, int reps)
{
struct test_results res;
long long times[SETS];
int i;
printf("Measuring %s: ", name);
fflush(stdout);
res.reps = reps;
for (i = 0; i < SETS; i++) {
test(&res);
times[i] = res.stop - res.start;
}
qsort(times, SETS, sizeof(long long), cmp_long_long);
printf("best %dns, 50%%ile %dns\n", (int)(times[0] * 1000 / reps),
(int)(times[SETS / 2] * 1000 / reps));
}
int main(void)
{
measure(lock_unlock, "Locking and unlocking without contention",
10000000);
measure(contention, "Locking and unlocking with contention",
100000);
return 0;
}