forked from memcached/memcached
-
Notifications
You must be signed in to change notification settings - Fork 0
/
proxy_await.c
383 lines (335 loc) · 13.7 KB
/
proxy_await.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#include "proxy.h"
typedef struct mcp_await_s {
int pending;
int wait_for;
int req_ref;
int argtable_ref; // need to hold refs to any potential hash selectors
int restable_ref; // table of result objects
int detail_ref; // reference to detail string.
enum mcp_await_e type;
bool completed; // have we completed the parent coroutine or not
bool logerr; // create log_req entries for error responses
mcp_request_t *rq;
mc_resp *resp; // the top level mc_resp to fill in (as if we were an iop)
mcp_rcontext_t *rctx; // request context
} mcp_await_t;
// TODO (v2): mcplib_await_gc()
// - needs to handle cases where an await is created, but a rare error happens
// before it completes and the coroutine is killed. must check and free its
// references.
// local restable = mcp.await(request, pools, num_wait)
// NOTE: need to hold onto the pool objects since those hold backend
// references. Here we just keep a reference to the argument table.
static int _mcplib_await(lua_State *L, bool logerr) {
mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
luaL_checktype(L, 2, LUA_TTABLE);
int n = 0; // length of table of pools
int wait_for = 0; // 0 means wait for all responses
enum mcp_await_e type = AWAIT_GOOD;
int detail_ref = 0;
lua_pushnil(L); // init table key
while (lua_next(L, 2) != 0) {
luaL_checkudata(L, -1, "mcp.pool_proxy");
lua_pop(L, 1); // remove value, keep key.
n++;
}
if (n <= 0) {
proxy_lua_error(L, "mcp.await arguments must have at least one pool");
}
if (lua_isstring(L, 5)) {
// pops the detail string.
detail_ref = luaL_ref(L, LUA_REGISTRYINDEX);
}
if (lua_isnumber(L, 4)) {
type = lua_tointeger(L, 4);
lua_pop(L, 1);
switch (type) {
case AWAIT_GOOD:
case AWAIT_ANY:
case AWAIT_OK:
case AWAIT_FIRST:
case AWAIT_FASTGOOD:
case AWAIT_BACKGROUND:
break;
default:
proxy_lua_error(L, "invalid type argument tp mcp.await");
}
}
if (lua_isnumber(L, 3)) {
wait_for = lua_tointeger(L, 3);
lua_pop(L, 1);
if (wait_for > n) {
wait_for = n;
}
}
// FIRST is only looking for one valid request.
if (type == AWAIT_FIRST) {
wait_for = 1;
}
// TODO (v2): quickly loop table once and ensure they're all pools?
// TODO (v2) in case of newuserdatauv throwing an error, we need to grab
// these references after allocating *aw else can leak memory.
int argtable_ref = luaL_ref(L, LUA_REGISTRYINDEX); // pops the arg table
int req_ref = luaL_ref(L, LUA_REGISTRYINDEX); // pops request object.
// stack will be only the await object now
// allocate before grabbing references so an error won't cause leaks.
mcp_await_t *aw = lua_newuserdatauv(L, sizeof(mcp_await_t), 0);
memset(aw, 0, sizeof(mcp_await_t));
// create result table
lua_newtable(L); // -> 2
aw->restable_ref = luaL_ref(L, LUA_REGISTRYINDEX); // pop the result table
aw->wait_for = wait_for;
aw->pending = n;
aw->argtable_ref = argtable_ref;
aw->rq = rq;
aw->req_ref = req_ref;
aw->detail_ref = detail_ref;
aw->type = type;
aw->logerr = logerr;
P_DEBUG("%s: about to yield [len: %d]\n", __func__, n);
lua_pushinteger(L, MCP_YIELD_AWAIT);
return lua_yield(L, 2);
}
// default await, no logging.
int mcplib_await(lua_State *L) {
return _mcplib_await(L, false);
}
int mcplib_await_logerrors(lua_State *L) {
return _mcplib_await(L, true);
}
// TODO (v2): need to get this code running under pcall().
// It looks like a bulk of this code can move into mcplib_await(),
// and then here post-yield we can add the rcontext to the right
// places. Else these errors currently crash the daemon.
int mcplib_await_run_rctx(mcp_rcontext_t *rctx) {
P_DEBUG("%s: start\n", __func__);
conn *c = rctx->c;
lua_State *L = rctx->Lc;
WSTAT_INCR(c->thread, proxy_await_active, 1);
mcp_await_t *aw = lua_touserdata(L, -1);
assert(aw != NULL);
int await_ref = luaL_ref(L, LUA_REGISTRYINDEX); // await is popped.
lua_rawgeti(L, LUA_REGISTRYINDEX, aw->argtable_ref); // -> 1
mcp_request_t *rq = aw->rq;
aw->rctx = rctx;
// prepare the request key
const char *key = MCP_PARSER_KEY(rq->pr);
size_t len = rq->pr.klen;
// TODO (v3) await_first is used as a marker for upping the "wait for
// IO's" queue count, which means we need to force it off if we're in
// background mode, else we would accidentally wait for a response anyway.
// This note is for finding a less convoluted method for this.
bool await_first = (aw->type == AWAIT_BACKGROUND) ? false : true;
// loop arg table and run each pool backend selector
lua_pushnil(L); // -> 3
while (lua_next(L, 1) != 0) {
P_DEBUG("%s: top of loop\n", __func__);
// (key, -2), (val, -1)
// skip the metatable checking here as we already check this in
// mcp.await()'s top level call.
mcp_pool_proxy_t *pp = lua_touserdata(L, -1);
if (pp == NULL) {
proxy_lua_error(L, "mcp.await must be supplied with a pool");
}
// NOTE: rq->be is only held to help pass the backend into the IOP in
// mcp_queue call. Could be a local variable and an argument too.
mcp_backend_t *be = mcplib_pool_proxy_call_helper(pp, key, len);
if (be == NULL) {
proxy_lua_error(L, "key dist hasher tried to use out of bounds index");
}
mcp_resp_t *res = mcp_prep_resobj(L, rq, be, rctx->c->thread);
io_pending_proxy_t *p = mcp_queue_rctx_io(rctx, rq, be, res);
if (p == NULL) {
// TODO: need to unroll this. _gc func?
}
rctx->async_pending++;
p->is_await = true;
p->await_ref = await_ref;
p->await_first = await_first;
// io_p needs to hold onto its own response reference, because we may or
// may not include it in the final await() result.
p->mcpres_ref = luaL_ref(L, LUA_REGISTRYINDEX); // pops mcp.response
await_first = false;
// pop value, keep key.
lua_pop(L, 1);
}
if (aw->type == AWAIT_BACKGROUND) {
io_pending_proxy_t *p = mcp_queue_rctx_io(rctx, NULL, NULL, NULL);
p->is_await = true;
p->await_ref = await_ref;
p->await_background = true;
rctx->async_pending++;
aw->pending++;
aw->wait_for = 0;
}
lua_pop(L, 1); // remove table key.
P_DEBUG("%s: end\n", __func__);
return 0;
}
// NOTE: This is unprotected lua/C code. There are no lua-style errors thrown
// purposefully as of this writing, but it's still not safe. Either the code
// can be restructured to use less lua (which I think is better long term
// anyway) or it can be pushed behind a cfunc pcall so we don't crash the
// daemon if something bad happens.
int mcplib_await_return(io_pending_proxy_t *p) {
mcp_await_t *aw;
lua_State *L = p->thread->L; // use the main VM coroutine for work
bool cleanup = false;
bool valid = false; // is response valid to add to the result table.
bool completing = false;
// TODO (v2): just push the await ptr into *p?
lua_rawgeti(L, LUA_REGISTRYINDEX, p->await_ref);
aw = lua_touserdata(L, -1);
lua_pop(L, 1); // remove AW object from stack
assert(aw != NULL);
P_DEBUG("%s: start [pending: %d]\n", __func__, aw->pending);
//dump_stack(L);
aw->pending--;
assert(aw->pending >= 0);
// Await not yet satisfied.
// If wait_for != 0 check for response success
// if success and wait_for is *now* 0, we complete.
// add successful response to response table
// Also, if no wait_for, add response to response table
// TODO (v2): for GOOD or OK cases, it might be better to return the
// last object as valid if there are otherwise zero valids?
// Think we just have to count valids...
if (aw->type == AWAIT_BACKGROUND) {
// in the background case, we never want to collect responses.
if (p->await_background) {
// found the dummy IO, complete and return conn to worker.
completing = true;
}
} else if (!aw->completed) {
valid = true; // always collect results unless we are completed.
if (aw->wait_for > 0) {
bool is_good = false;
switch (aw->type) {
case AWAIT_GOOD:
if (p->client_resp->status == MCMC_OK && p->client_resp->resp.code != MCMC_CODE_END) {
is_good = true;
}
break;
case AWAIT_ANY:
is_good = true;
break;
case AWAIT_OK:
if (p->client_resp->status == MCMC_OK) {
is_good = true;
}
break;
case AWAIT_FIRST:
if (p->await_first) {
is_good = true;
} else {
// user only wants the first pool's result.
valid = false;
}
break;
case AWAIT_FASTGOOD:
if (p->client_resp->status == MCMC_OK) {
// End early on a hit.
if (p->client_resp->resp.code != MCMC_CODE_END) {
aw->wait_for = 0;
} else {
is_good = true;
}
}
break;
case AWAIT_BACKGROUND:
// In background mode we don't wait for any response.
break;
}
if (is_good) {
aw->wait_for--;
}
if (aw->wait_for == 0) {
completing = true;
}
}
}
// note that post-completion, we stop gathering responses into the
// response table... because it's already been returned.
// So "valid" can only be true if also !completed
if (aw->pending == 0) {
if (!aw->completed) {
// were waiting for all responses.
completing = true;
}
cleanup = true;
P_DEBUG("%s: pending == 0\n", __func__);
}
// a valid response to add to the result table.
if (valid) {
P_DEBUG("%s: valid\n", __func__);
lua_rawgeti(L, LUA_REGISTRYINDEX, aw->restable_ref); // -> 1
lua_rawgeti(L, LUA_REGISTRYINDEX, p->mcpres_ref); // -> 2
// couldn't find a table.insert() equivalent; so this is
// inserting into the length + 1 position manually.
//dump_stack(L);
lua_rawseti(L, -2, lua_rawlen(L, 1) + 1); // pops mcpres
lua_pop(L, 1); // pops restable
}
// lose our internal mcpres reference regardless.
// also tag the elapsed time into the response.
if (p->mcpres_ref) {
struct timeval end;
gettimeofday(&end, NULL);
p->client_resp->elapsed = (end.tv_sec - p->client_resp->start.tv_sec) * 1000000 +
(end.tv_usec - p->client_resp->start.tv_usec);
// instructed to generate log_req entries for each failed request,
// this is useful to do here as these can be asynchronous.
// NOTE: this may be a temporary feature.
if (aw->logerr && p->client_resp->status != MCMC_OK && aw->completed) {
size_t dlen = 0;
const char *detail = NULL;
logger *l = p->thread->l;
// only process logs if someone is listening.
if (l->eflags & LOG_PROXYREQS) {
lua_rawgeti(L, LUA_REGISTRYINDEX, aw->req_ref);
mcp_request_t *rq = lua_touserdata(L, -1);
lua_pop(L, 1); // references still held, just clearing stack.
mcp_resp_t *rs = p->client_resp;
if (aw->detail_ref) {
lua_rawgeti(L, LUA_REGISTRYINDEX, aw->detail_ref);
detail = luaL_tolstring(L, -1, &dlen);
lua_pop(L, 1);
}
logger_log(l, LOGGER_PROXY_REQ, NULL, rq->pr.request, rq->pr.reqlen, rs->elapsed, rs->resp.type, rs->resp.code, rs->status, detail, dlen, rs->be_name, rs->be_port);
}
}
luaL_unref(L, LUA_REGISTRYINDEX, p->mcpres_ref);
}
// our await_ref is shared, so we don't need to release it.
if (completing) {
P_DEBUG("%s: completing\n", __func__);
assert(p->c->thread == p->thread);
aw->completed = true;
lua_State *Lc = p->rctx->Lc;
lua_rawgeti(Lc, LUA_REGISTRYINDEX, aw->restable_ref); // -> 1
luaL_unref(L, LUA_REGISTRYINDEX, aw->restable_ref);
proxy_run_rcontext(p->rctx);
io_queue_t *q = conn_io_queue_get(p->c, p->io_queue_type);
q->count--;
if (q->count == 0) {
// call re-add directly since we're already in the worker thread.
conn_worker_readd(p->c);
}
}
if (cleanup) {
P_DEBUG("%s: cleanup [completed: %d]\n", __func__, aw->completed);
luaL_unref(L, LUA_REGISTRYINDEX, aw->argtable_ref);
luaL_unref(L, LUA_REGISTRYINDEX, aw->req_ref);
luaL_unref(L, LUA_REGISTRYINDEX, p->await_ref);
if (aw->detail_ref) {
luaL_unref(L, LUA_REGISTRYINDEX, aw->detail_ref);
}
WSTAT_DECR(p->thread, proxy_await_active, 1);
}
// Just remove anything we could have left on the primary VM stack
lua_settop(L, 0);
// always return free this sub-IO object.
do_cache_free(p->thread->io_cache, p);
return 0;
}