Skip to content

Commit

Permalink
update jemalloc
Browse files Browse the repository at this point in the history
  • Loading branch information
lnkuiper committed Aug 8, 2024
1 parent fa78779 commit 0ca6be0
Show file tree
Hide file tree
Showing 33 changed files with 640 additions and 174 deletions.
1 change: 1 addition & 0 deletions extension/jemalloc/jemalloc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set(JEMALLOC_C_FILES
src/arena.c
src/background_thread.c
src/base.c
src/batcher.c
src/bin.c
src/bin_info.c
src/bitmap.c
Expand Down
4 changes: 2 additions & 2 deletions extension/jemalloc/jemalloc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Add this to `jemalloc.h`:
We also supply our own config string in `jemalloc.c`.
Define this just after the `#include`s.
```c++
#define JE_MALLOC_CONF_BUFFER_SIZE 200;
#define JE_MALLOC_CONF_BUFFER_SIZE 200
char JE_MALLOC_CONF_BUFFER[JE_MALLOC_CONF_BUFFER_SIZE];
```
This is what `jemalloc_constructor` in `jemalloc.c` should look like:
Expand All @@ -79,7 +79,7 @@ jemalloc_constructor(void) {
bgt_count = 1;
}
// decay is in ms
unsigned long long decay = DUCKDB_DECAY_DELAY * 1000;
unsigned long long decay = DUCKDB_JEMALLOC_DECAY * 1000;
#ifdef DEBUG
snprintf(JE_MALLOC_CONF_BUFFER, JE_MALLOC_CONF_BUFFER_SIZE, "junk:true,oversize_threshold:268435456,dirty_decay_ms:%llu,muzzy_decay_ms:%llu,narenas:%llu,max_background_threads:%llu", decay, decay, cpu_count, bgt_count);
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ void arena_nthreads_inc(arena_t *arena, bool internal);
void arena_nthreads_dec(arena_t *arena, bool internal);
arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
bool arena_init_huge(arena_t *a0);
bool arena_is_huge(unsigned arena_ind);
arena_t *arena_choose_huge(tsd_t *tsd);
bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
unsigned *binshard);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,11 @@ arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
* stats updates, which happen during finish (this lets running counts get left
* in a register).
*/
JEMALLOC_ALWAYS_INLINE bool
JEMALLOC_ALWAYS_INLINE void
arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
void *ptr) {
void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
const bin_info_t *bin_info = &bin_infos[binind];
size_t regind = arena_slab_regind(info, binind, slab, ptr);
slab_data_t *slab_data = edata_slab_data_get(slab);
Expand All @@ -586,12 +587,17 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
if (nfree == bin_info->nregs) {
arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
bin);
return true;

if (*dalloc_slabs_count < ndalloc_slabs) {
dalloc_slabs[*dalloc_slabs_count] = slab;
(*dalloc_slabs_count)++;
} else {
edata_list_active_append(dalloc_slabs_extra, slab);
}
} else if (nfree == 1 && slab != bin->slabcur) {
arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
bin);
}
return false;
}

JEMALLOC_ALWAYS_INLINE void
Expand All @@ -604,10 +610,149 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
}
}

JEMALLOC_ALWAYS_INLINE void
arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind,
edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count,
edata_list_active_t *dalloc_slabs_extra) {
assert(binind < bin_info_nbatched_sizes);
bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
size_t nelems_to_pop = batcher_pop_begin(tsdn,
&batched_bin->remote_frees);

bin_batching_test_mid_pop(nelems_to_pop);
if (nelems_to_pop == BATCHER_NO_IDX) {
malloc_mutex_assert_not_owner(tsdn,
&batched_bin->remote_frees.mtx);
return;
} else {
malloc_mutex_assert_owner(tsdn,
&batched_bin->remote_frees.mtx);
}

size_t npushes = batcher_pop_get_pushes(tsdn,
&batched_bin->remote_frees);
bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
for (size_t i = 0; i < nelems_to_pop; i++) {
remote_free_data[i] = batched_bin->remote_free_data[i];
}
batcher_pop_end(tsdn, &batched_bin->remote_frees);

for (size_t i = 0; i < nelems_to_pop; i++) {
arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info,
binind, remote_free_data[i].slab, remote_free_data[i].ptr,
dalloc_slabs, ndalloc_slabs, dalloc_count,
dalloc_slabs_extra);
}

bin->stats.batch_pops++;
bin->stats.batch_pushes += npushes;
bin->stats.batch_pushed_elems += nelems_to_pop;
}

typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
struct arena_bin_flush_batch_state_s {
arena_dalloc_bin_locked_info_t info;

/*
* Bin batching is subtle in that there are unusual edge cases in which
* it can trigger the deallocation of more slabs than there were items
* flushed (say, if every original deallocation triggered a slab
* deallocation, and so did every batched one). So we keep a small
* backup array for any "extra" slabs, as well as a a list to allow a
* dynamic number of ones exceeding that array.
*/
edata_t *dalloc_slabs[8];
unsigned dalloc_slab_count;
edata_list_active_t dalloc_slabs_extra;
};

JEMALLOC_ALWAYS_INLINE unsigned
arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) {
if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) {
return bin_batching_test_ndalloc_slabs_max;
}
return preallocated_slabs;
}

JEMALLOC_ALWAYS_INLINE void
arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
unsigned binind, arena_bin_flush_batch_state_t *state) {
if (binind >= bin_info_nbatched_sizes) {
return;
}

arena_dalloc_bin_locked_begin(&state->info, binind);
state->dalloc_slab_count = 0;
edata_list_active_init(&state->dalloc_slabs_extra);

unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs)
/ sizeof(state->dalloc_slabs[0]));
unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
preallocated_slabs);

arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
state->dalloc_slabs, ndalloc_slabs,
&state->dalloc_slab_count, &state->dalloc_slabs_extra);
}

JEMALLOC_ALWAYS_INLINE void
arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
unsigned binind, arena_bin_flush_batch_state_t *state) {
if (binind >= bin_info_nbatched_sizes) {
return;
}

arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info);
}

static inline bool
arena_bin_has_batch(szind_t binind) {
return binind < bin_info_nbatched_sizes;
}

JEMALLOC_ALWAYS_INLINE void
arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
unsigned binind, arena_bin_flush_batch_state_t *state) {
if (!arena_bin_has_batch(binind)) {
return;
}
/*
* The initialization of dalloc_slabs_extra is guarded by an
* arena_bin_has_batch check higher up the stack. But the clang
* analyzer forgets this down the stack, triggering a spurious error
* reported here.
*/
JEMALLOC_CLANG_ANALYZER_SUPPRESS {
bin_batching_test_after_unlock(state->dalloc_slab_count,
edata_list_active_empty(&state->dalloc_slabs_extra));
}
for (unsigned i = 0; i < state->dalloc_slab_count; i++) {
edata_t *slab = state->dalloc_slabs[i];
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
}
while (!edata_list_active_empty(&state->dalloc_slabs_extra)) {
edata_t *slab = edata_list_active_first(
&state->dalloc_slabs_extra);
edata_list_active_remove(&state->dalloc_slabs_extra, slab);
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
}
}

static inline bin_t *
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
return shard0 + binshard;
bin_t *ret;
if (arena_bin_has_batch(binind)) {
ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
} else {
ret = shard0 + binshard;
}
assert(binind >= SC_NBINS - 1
|| (uintptr_t)ret < (uintptr_t)arena
+ arena_bin_offsets[binind + 1]);

return ret;
}

#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ struct arena_s {
JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
"Use `arena_get_bin` instead.")
JEMALLOC_ALIGNED(CACHELINE)
bin_t all_bins[0];
bin_with_batch_t all_bins[0];
};

#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
46 changes: 46 additions & 0 deletions extension/jemalloc/jemalloc/include/jemalloc/internal/batcher.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef JEMALLOC_INTERNAL_BATCHER_H
#define JEMALLOC_INTERNAL_BATCHER_H

#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"

#define BATCHER_NO_IDX ((size_t)-1)

typedef struct batcher_s batcher_t;
struct batcher_s {
/*
* Optimize for locality -- nelems_max and nelems are always touched
* togehter, along with the front of the mutex. The end of the mutex is
* only touched if there's contention.
*/
atomic_zu_t nelems;
size_t nelems_max;
size_t npushes;
malloc_mutex_t mtx;
};

void batcher_init(batcher_t *batcher, size_t nelems_max);

/*
* Returns an index (into some user-owned array) to use for pushing, or
* BATCHER_NO_IDX if no index is free. If the former, the caller must call
* batcher_push_end once done.
*/
size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
size_t elems_to_push);
void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);

/*
* Returns the number of items to pop, or BATCHER_NO_IDX if there are none.
* If the former, must be followed by a call to batcher_pop_end.
*/
size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);

void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher);

#endif /* JEMALLOC_INTERNAL_BATCHER_H */
74 changes: 70 additions & 4 deletions extension/jemalloc/jemalloc/include/jemalloc/internal/bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,60 @@
#define JEMALLOC_INTERNAL_BIN_H

#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/batcher.h"
#include "jemalloc/internal/bin_stats.h"
#include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/edata.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/sc.h"

#define BIN_REMOTE_FREE_ELEMS_MAX 16

#ifdef JEMALLOC_JET
extern void (*bin_batching_test_after_push_hook)(size_t idx);
extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
extern void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
bool list_empty);
#endif

#ifdef JEMALLOC_JET
extern unsigned bin_batching_test_ndalloc_slabs_max;
#else
static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
#endif

JEMALLOC_ALWAYS_INLINE void
bin_batching_test_after_push(size_t idx) {
(void)idx;
#ifdef JEMALLOC_JET
if (bin_batching_test_after_push_hook != NULL) {
bin_batching_test_after_push_hook(idx);
}
#endif
}

JEMALLOC_ALWAYS_INLINE void
bin_batching_test_mid_pop(size_t elems_to_pop) {
(void)elems_to_pop;
#ifdef JEMALLOC_JET
if (bin_batching_test_mid_pop_hook != NULL) {
bin_batching_test_mid_pop_hook(elems_to_pop);
}
#endif
}

JEMALLOC_ALWAYS_INLINE void
bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
(void)slab_dalloc_count;
(void)list_empty;
#ifdef JEMALLOC_JET
if (bin_batching_test_after_unlock_hook != NULL) {
bin_batching_test_after_unlock_hook(slab_dalloc_count,
list_empty);
}
#endif
}

/*
* A bin contains a set of extents that are currently being used for slab
* allocations.
Expand Down Expand Up @@ -42,6 +90,19 @@ struct bin_s {
edata_list_active_t slabs_full;
};

typedef struct bin_remote_free_data_s bin_remote_free_data_t;
struct bin_remote_free_data_s {
void *ptr;
edata_t *slab;
};

typedef struct bin_with_batch_s bin_with_batch_t;
struct bin_with_batch_s {
bin_t bin;
batcher_t remote_frees;
bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
};

/* A set of sharded bins of the same size class. */
typedef struct bins_s bins_t;
struct bins_s {
Expand All @@ -54,12 +115,12 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
size_t end_size, size_t nshards);

/* Initializes a bin to empty. Returns true on error. */
bool bin_init(bin_t *bin);
bool bin_init(bin_t *bin, unsigned binind);

/* Forking. */
void bin_prefork(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);

/* Stats. */
static inline void
Expand All @@ -77,6 +138,11 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
stats->reslabs += bin->stats.reslabs;
stats->curslabs += bin->stats.curslabs;
stats->nonfull_slabs += bin->stats.nonfull_slabs;

stats->batch_failed_pushes += bin->stats.batch_failed_pushes;
stats->batch_pushes += bin->stats.batch_pushes;
stats->batch_pushed_elems += bin->stats.batch_pushed_elems;

malloc_mutex_unlock(tsdn, &bin->lock);
}

Expand Down
Loading

0 comments on commit 0ca6be0

Please sign in to comment.